From ca7784e7731eb9e9827df986ce4a71afc25eca4b Mon Sep 17 00:00:00 2001 From: "cl349@firebug.cl.cam.ac.uk" Date: Fri, 11 Mar 2005 00:16:05 +0000 Subject: [PATCH] bitkeeper revision 1.1159.268.1 (4230e3456i1RsGKcSPg-xYttkmeJEw) Update to Linux 2.6.11. Signed-off-by: Christian Limpach --- .rootkeys | 252 ++- .../include/linux/skbuff.h | 1189 ------------- linux-2.6.10-xen-sparse/net/core/skbuff.c | 1521 ----------------- .../arch/xen/Kconfig | 0 .../arch/xen/Kconfig.drivers | 0 .../arch/xen/Makefile | 0 .../arch/xen/boot/Makefile | 0 .../arch/xen/configs/xen0_defconfig | 0 .../arch/xen/configs/xenU_defconfig | 0 .../arch/xen/i386/Kconfig | 19 +- .../arch/xen/i386/Makefile | 0 .../arch/xen/i386/kernel/Makefile | 0 .../arch/xen/i386/kernel/cpu/Makefile | 0 .../arch/xen/i386/kernel/cpu/common.c | 89 +- .../arch/xen/i386/kernel/cpu/mtrr/Makefile | 0 .../arch/xen/i386/kernel/cpu/mtrr/main.c | 0 .../arch/xen/i386/kernel/entry.S | 7 +- .../arch/xen/i386/kernel/head.S | 0 .../arch/xen/i386/kernel/i386_ksyms.c | 2 - .../arch/xen/i386/kernel/ioport.c | 0 .../arch/xen/i386/kernel/ldt.c | 0 .../arch/xen/i386/kernel/microcode.c | 0 .../arch/xen/i386/kernel/pci-dma.c | 12 +- .../arch/xen/i386/kernel/process.c | 35 +- .../arch/xen/i386/kernel/setup.c | 84 +- .../arch/xen/i386/kernel/signal.c | 44 +- .../arch/xen/i386/kernel/time.c | 10 +- .../arch/xen/i386/kernel/timers/Makefile | 0 .../arch/xen/i386/kernel/timers/timer_tsc.c | 0 .../arch/xen/i386/kernel/traps.c | 47 +- .../arch/xen/i386/kernel/vsyscall.S | 0 .../arch/xen/i386/kernel/vsyscall.lds | 0 .../arch/xen/i386/mm/Makefile | 0 .../arch/xen/i386/mm/fault.c | 17 +- .../arch/xen/i386/mm/highmem.c | 6 +- .../arch/xen/i386/mm/hypervisor.c | 12 +- .../arch/xen/i386/mm/init.c | 26 +- .../arch/xen/i386/mm/ioremap.c | 17 +- .../arch/xen/i386/mm/pageattr.c | 49 +- .../arch/xen/i386/mm/pgtable.c | 45 +- .../arch/xen/i386/pci/Makefile | 0 .../arch/xen/i386/pci/direct.c | 0 .../arch/xen/i386/pci/irq.c | 4 +- .../arch/xen/kernel/Makefile | 0 .../arch/xen/kernel/ctrl_if.c | 0 .../arch/xen/kernel/devmem.c | 0 .../arch/xen/kernel/evtchn.c | 0 .../arch/xen/kernel/fixup.c | 0 .../arch/xen/kernel/gnttab.c | 0 .../arch/xen/kernel/reboot.c | 0 .../arch/xen/kernel/skbuff.c | 0 .../arch/xen/kernel/xen_proc.c | 0 .../drivers/Makefile | 3 + .../drivers/char/mem.c | 0 .../drivers/char/tty_io.c | 87 +- .../drivers/xen/Makefile | 0 .../drivers/xen/balloon/Makefile | 0 .../drivers/xen/balloon/balloon.c | 6 +- .../drivers/xen/blkback/Makefile | 0 .../drivers/xen/blkback/blkback.c | 0 .../drivers/xen/blkback/common.h | 0 .../drivers/xen/blkback/control.c | 0 .../drivers/xen/blkback/interface.c | 0 .../drivers/xen/blkback/vbd.c | 0 .../drivers/xen/blkfront/Kconfig | 0 .../drivers/xen/blkfront/Makefile | 0 .../drivers/xen/blkfront/blkfront.c | 0 .../drivers/xen/blkfront/block.h | 0 .../drivers/xen/blkfront/vbd.c | 0 .../drivers/xen/console/Makefile | 0 .../drivers/xen/console/console.c | 0 .../drivers/xen/evtchn/Makefile | 0 .../drivers/xen/evtchn/evtchn.c | 0 .../drivers/xen/netback/Makefile | 0 .../drivers/xen/netback/common.h | 0 .../drivers/xen/netback/control.c | 0 .../drivers/xen/netback/interface.c | 0 .../drivers/xen/netback/netback.c | 0 .../drivers/xen/netfront/Kconfig | 0 .../drivers/xen/netfront/Makefile | 0 .../drivers/xen/netfront/netfront.c | 0 .../drivers/xen/privcmd/Makefile | 0 .../drivers/xen/privcmd/privcmd.c | 0 .../include/asm-generic/pgtable.h | 0 .../include/asm-xen/asm-i386/desc.h | 9 + .../include/asm-xen/asm-i386/dma-mapping.h | 0 .../include/asm-xen/asm-i386/fixmap.h | 4 +- .../include/asm-xen/asm-i386/floppy.h | 0 .../include/asm-xen/asm-i386/highmem.h | 21 +- .../include/asm-xen/asm-i386/io.h | 0 .../asm-xen/asm-i386/mach-xen/irq_vectors.h | 0 .../asm-i386/mach-xen/setup_arch_post.h | 0 .../asm-i386/mach-xen/setup_arch_pre.h | 0 .../include/asm-xen/asm-i386/mmu_context.h | 0 .../include/asm-xen/asm-i386/msr.h | 0 .../include/asm-xen/asm-i386/page.h | 16 +- .../include/asm-xen/asm-i386/param.h | 0 .../include/asm-xen/asm-i386/pci.h | 0 .../include/asm-xen/asm-i386/pgalloc.h | 23 +- .../asm-xen/asm-i386/pgtable-2level-defs.h | 2 - .../include/asm-xen/asm-i386/pgtable-2level.h | 34 +- .../include/asm-xen/asm-i386/pgtable.h | 19 +- .../include/asm-xen/asm-i386/processor.h | 20 +- .../include/asm-xen/asm-i386/ptrace.h | 2 + .../include/asm-xen/asm-i386/segment.h | 0 .../include/asm-xen/asm-i386/setup.h | 0 .../include/asm-xen/asm-i386/synch_bitops.h | 0 .../include/asm-xen/asm-i386/system.h | 1 + .../include/asm-xen/asm-i386/tlbflush.h | 0 .../include/asm-xen/asm-i386/vga.h | 0 .../include/asm-xen/asm-i386/xor.h | 0 .../include/asm-xen/balloon.h | 0 .../include/asm-xen/ctrl_if.h | 0 .../include/asm-xen/evtchn.h | 0 .../include/asm-xen/foreign_page.h | 0 .../include/asm-xen/gnttab.h | 0 .../include/asm-xen/hypervisor.h | 5 +- .../include/asm-xen/linux-public/privcmd.h | 0 .../include/asm-xen/linux-public/suspend.h | 0 .../include/asm-xen/multicall.h | 0 .../include/asm-xen/queues.h | 0 .../include/asm-xen/xen_proc.h | 0 .../include/linux/gfp.h | 1 + .../include/linux/irq.h | 3 +- .../kernel/irq/manage.c | 2 + .../mkbuildtree | 0 .../mm/memory.c | 1104 ++++++++---- .../mm/page_alloc.c | 638 +++---- 128 files changed, 1744 insertions(+), 3743 deletions(-) delete mode 100644 linux-2.6.10-xen-sparse/include/linux/skbuff.h delete mode 100644 linux-2.6.10-xen-sparse/net/core/skbuff.c rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/arch/xen/Kconfig (100%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/arch/xen/Kconfig.drivers (100%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/arch/xen/Makefile (100%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/arch/xen/boot/Makefile (100%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/arch/xen/configs/xen0_defconfig (100%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/arch/xen/configs/xenU_defconfig (100%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/arch/xen/i386/Kconfig (98%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/arch/xen/i386/Makefile (100%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/arch/xen/i386/kernel/Makefile (100%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/arch/xen/i386/kernel/cpu/Makefile (100%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/arch/xen/i386/kernel/cpu/common.c (89%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/arch/xen/i386/kernel/cpu/mtrr/Makefile (100%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/arch/xen/i386/kernel/cpu/mtrr/main.c (100%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/arch/xen/i386/kernel/entry.S (99%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/arch/xen/i386/kernel/head.S (100%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/arch/xen/i386/kernel/i386_ksyms.c (98%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/arch/xen/i386/kernel/ioport.c (100%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/arch/xen/i386/kernel/ldt.c (100%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/arch/xen/i386/kernel/microcode.c (100%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/arch/xen/i386/kernel/pci-dma.c (93%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/arch/xen/i386/kernel/process.c (97%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/arch/xen/i386/kernel/setup.c (96%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/arch/xen/i386/kernel/signal.c (95%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/arch/xen/i386/kernel/time.c (99%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/arch/xen/i386/kernel/timers/Makefile (100%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/arch/xen/i386/kernel/timers/timer_tsc.c (100%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/arch/xen/i386/kernel/traps.c (95%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/arch/xen/i386/kernel/vsyscall.S (100%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/arch/xen/i386/kernel/vsyscall.lds (100%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/arch/xen/i386/mm/Makefile (100%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/arch/xen/i386/mm/fault.c (97%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/arch/xen/i386/mm/highmem.c (96%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/arch/xen/i386/mm/hypervisor.c (96%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/arch/xen/i386/mm/init.c (97%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/arch/xen/i386/mm/ioremap.c (96%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/arch/xen/i386/mm/pageattr.c (84%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/arch/xen/i386/mm/pgtable.c (90%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/arch/xen/i386/pci/Makefile (100%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/arch/xen/i386/pci/direct.c (100%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/arch/xen/i386/pci/irq.c (97%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/arch/xen/kernel/Makefile (100%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/arch/xen/kernel/ctrl_if.c (100%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/arch/xen/kernel/devmem.c (100%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/arch/xen/kernel/evtchn.c (100%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/arch/xen/kernel/fixup.c (100%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/arch/xen/kernel/gnttab.c (100%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/arch/xen/kernel/reboot.c (100%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/arch/xen/kernel/skbuff.c (100%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/arch/xen/kernel/xen_proc.c (100%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/drivers/Makefile (94%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/drivers/char/mem.c (100%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/drivers/char/tty_io.c (98%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/drivers/xen/Makefile (100%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/drivers/xen/balloon/Makefile (100%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/drivers/xen/balloon/balloon.c (98%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/drivers/xen/blkback/Makefile (100%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/drivers/xen/blkback/blkback.c (100%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/drivers/xen/blkback/common.h (100%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/drivers/xen/blkback/control.c (100%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/drivers/xen/blkback/interface.c (100%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/drivers/xen/blkback/vbd.c (100%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/drivers/xen/blkfront/Kconfig (100%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/drivers/xen/blkfront/Makefile (100%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/drivers/xen/blkfront/blkfront.c (100%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/drivers/xen/blkfront/block.h (100%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/drivers/xen/blkfront/vbd.c (100%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/drivers/xen/console/Makefile (100%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/drivers/xen/console/console.c (100%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/drivers/xen/evtchn/Makefile (100%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/drivers/xen/evtchn/evtchn.c (100%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/drivers/xen/netback/Makefile (100%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/drivers/xen/netback/common.h (100%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/drivers/xen/netback/control.c (100%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/drivers/xen/netback/interface.c (100%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/drivers/xen/netback/netback.c (100%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/drivers/xen/netfront/Kconfig (100%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/drivers/xen/netfront/Makefile (100%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/drivers/xen/netfront/netfront.c (100%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/drivers/xen/privcmd/Makefile (100%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/drivers/xen/privcmd/privcmd.c (100%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/include/asm-generic/pgtable.h (100%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/include/asm-xen/asm-i386/desc.h (94%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/include/asm-xen/asm-i386/dma-mapping.h (100%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/include/asm-xen/asm-i386/fixmap.h (96%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/include/asm-xen/asm-i386/floppy.h (100%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/include/asm-xen/asm-i386/highmem.h (85%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/include/asm-xen/asm-i386/io.h (100%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/include/asm-xen/asm-i386/mach-xen/irq_vectors.h (100%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/include/asm-xen/asm-i386/mach-xen/setup_arch_post.h (100%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/include/asm-xen/asm-i386/mach-xen/setup_arch_pre.h (100%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/include/asm-xen/asm-i386/mmu_context.h (100%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/include/asm-xen/asm-i386/msr.h (100%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/include/asm-xen/asm-i386/page.h (93%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/include/asm-xen/asm-i386/param.h (100%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/include/asm-xen/asm-i386/pci.h (100%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/include/asm-xen/asm-i386/pgalloc.h (70%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/include/asm-xen/asm-i386/pgtable-2level-defs.h (90%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/include/asm-xen/asm-i386/pgtable-2level.h (83%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/include/asm-xen/asm-i386/pgtable.h (97%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/include/asm-xen/asm-i386/processor.h (97%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/include/asm-xen/asm-i386/ptrace.h (92%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/include/asm-xen/asm-i386/segment.h (100%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/include/asm-xen/asm-i386/setup.h (100%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/include/asm-xen/asm-i386/synch_bitops.h (100%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/include/asm-xen/asm-i386/system.h (99%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/include/asm-xen/asm-i386/tlbflush.h (100%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/include/asm-xen/asm-i386/vga.h (100%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/include/asm-xen/asm-i386/xor.h (100%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/include/asm-xen/balloon.h (100%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/include/asm-xen/ctrl_if.h (100%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/include/asm-xen/evtchn.h (100%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/include/asm-xen/foreign_page.h (100%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/include/asm-xen/gnttab.h (100%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/include/asm-xen/hypervisor.h (98%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/include/asm-xen/linux-public/privcmd.h (100%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/include/asm-xen/linux-public/suspend.h (100%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/include/asm-xen/multicall.h (100%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/include/asm-xen/queues.h (100%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/include/asm-xen/xen_proc.h (100%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/include/linux/gfp.h (98%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/include/linux/irq.h (99%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/kernel/irq/manage.c (99%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/mkbuildtree (100%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/mm/memory.c (66%) rename {linux-2.6.10-xen-sparse => linux-2.6.11-xen-sparse}/mm/page_alloc.c (80%) diff --git a/.rootkeys b/.rootkeys index d15fe9d75d..8e29f01aa4 100644 --- a/.rootkeys +++ b/.rootkeys @@ -127,133 +127,131 @@ 3e5a4e683HKVU-sxtagrDasRB8eBVw linux-2.4.29-xen-sparse/mm/swapfile.c 41180721bNns9Na7w1nJ0ZVt8bhUNA linux-2.4.29-xen-sparse/mm/vmalloc.c 41505c57WAd5l1rlfCLNSCpx9J13vA linux-2.4.29-xen-sparse/net/core/skbuff.c -40f562372u3A7_kfbYYixPHJJxYUxA linux-2.6.10-xen-sparse/arch/xen/Kconfig -40f56237utH41NPukqHksuNf29IC9A linux-2.6.10-xen-sparse/arch/xen/Kconfig.drivers -40f56237penAAlWVBVDpeQZNFIg8CA linux-2.6.10-xen-sparse/arch/xen/Makefile -40f56237JTc60m1FRlUxkUaGSQKrNw linux-2.6.10-xen-sparse/arch/xen/boot/Makefile -40f56237hRxbacU_3PdoAl6DjZ3Jnw linux-2.6.10-xen-sparse/arch/xen/configs/xen0_defconfig -40f56237wubfjJKlfIzZlI3ZM2VgGA linux-2.6.10-xen-sparse/arch/xen/configs/xenU_defconfig -40f56237Mta0yHNaMS_qtM2rge0qYA linux-2.6.10-xen-sparse/arch/xen/i386/Kconfig -40f56238u2CJdXNpjsZgHBxeVyY-2g linux-2.6.10-xen-sparse/arch/xen/i386/Makefile -40f56238eczveJ86k_4hNxCLRQIF-g linux-2.6.10-xen-sparse/arch/xen/i386/kernel/Makefile -40f56238rXVTJQKbBuXXLH52qEArcg linux-2.6.10-xen-sparse/arch/xen/i386/kernel/cpu/Makefile -40f562385s4lr6Zg92gExe7UQ4A76Q linux-2.6.10-xen-sparse/arch/xen/i386/kernel/cpu/common.c -41ab440bnpxZdWShZrGgM9pPaz5rmA linux-2.6.10-xen-sparse/arch/xen/i386/kernel/cpu/mtrr/Makefile -41ab440bBKWz-aEOEojU4PAMXe3Ppg linux-2.6.10-xen-sparse/arch/xen/i386/kernel/cpu/mtrr/main.c -40f56238XDtHSijkAFlbv1PT8Bhw_Q linux-2.6.10-xen-sparse/arch/xen/i386/kernel/entry.S -40f56238bnvciAuyzAiMkdzGErYt1A linux-2.6.10-xen-sparse/arch/xen/i386/kernel/head.S -40f58a0d31M2EkuPbG94ns_nOi0PVA linux-2.6.10-xen-sparse/arch/xen/i386/kernel/i386_ksyms.c -40faa751_zbZlAmLyQgCXdYekVFdWA linux-2.6.10-xen-sparse/arch/xen/i386/kernel/ioport.c -40f56238ue3YRsK52HG7iccNzP1AwQ linux-2.6.10-xen-sparse/arch/xen/i386/kernel/ldt.c -41d54a76YMCA67S8J-TBT3J62Wx6yA linux-2.6.10-xen-sparse/arch/xen/i386/kernel/microcode.c -4107adf1cNtsuOxOB4T6paAoY2R2PA linux-2.6.10-xen-sparse/arch/xen/i386/kernel/pci-dma.c -40f56238a8iOVDEoostsbun_sy2i4g linux-2.6.10-xen-sparse/arch/xen/i386/kernel/process.c -40f56238YQIJoYG2ehDGEcdTgLmGbg linux-2.6.10-xen-sparse/arch/xen/i386/kernel/setup.c -40f56238nWMQg7CKbyTy0KJNvCzbtg linux-2.6.10-xen-sparse/arch/xen/i386/kernel/signal.c -40f56238qVGkpO_ycnQA8k03kQzAgA linux-2.6.10-xen-sparse/arch/xen/i386/kernel/time.c -40f56238NzTgeO63RGoxHrW5NQeO3Q linux-2.6.10-xen-sparse/arch/xen/i386/kernel/timers/Makefile -40f56238BMqG5PuSHufpjbvp_helBw linux-2.6.10-xen-sparse/arch/xen/i386/kernel/timers/timer_tsc.c -40f562389xNa78YBZciUibQjyRU_Lg linux-2.6.10-xen-sparse/arch/xen/i386/kernel/traps.c -40f56238JypKAUG01ZojFwH7qnZ5uA linux-2.6.10-xen-sparse/arch/xen/i386/kernel/vsyscall.S -40f56238wi6AdNQjm0RT57bSkwb6hg linux-2.6.10-xen-sparse/arch/xen/i386/kernel/vsyscall.lds -40f56238a3w6-byOzexIlMgni76Lcg linux-2.6.10-xen-sparse/arch/xen/i386/mm/Makefile -40f56238ILx8xlbywNbzTdv5Zr4xXQ linux-2.6.10-xen-sparse/arch/xen/i386/mm/fault.c -4118cc35CbY8rfGVspF5O-7EkXBEAA linux-2.6.10-xen-sparse/arch/xen/i386/mm/highmem.c -40f562383SKvDStdtrvzr5fyCbW4rw linux-2.6.10-xen-sparse/arch/xen/i386/mm/hypervisor.c -40f56239xcNylAxuGsQHwi1AyMLV8w linux-2.6.10-xen-sparse/arch/xen/i386/mm/init.c -41062ab7CjxC1UBaFhOMWWdhHkIUyg linux-2.6.10-xen-sparse/arch/xen/i386/mm/ioremap.c -413b5ab8LIowAnQrEmaOJSdmqm96jQ linux-2.6.10-xen-sparse/arch/xen/i386/mm/pageattr.c -40f5623906UYHv1rsVUeRc0tFT0dWw linux-2.6.10-xen-sparse/arch/xen/i386/mm/pgtable.c -4107adf12ndy94MidCaivDibJ3pPAg linux-2.6.10-xen-sparse/arch/xen/i386/pci/Makefile -4107adf1WcCgkhsdLTRGX52cOG1vJg linux-2.6.10-xen-sparse/arch/xen/i386/pci/direct.c -4107adf1s5u6249DNPUViX1YNagbUQ linux-2.6.10-xen-sparse/arch/xen/i386/pci/irq.c -40f56239zOksGg_H4XD4ye6iZNtoZA linux-2.6.10-xen-sparse/arch/xen/kernel/Makefile -40f56239bvOjuuuViZ0XMlNiREFC0A linux-2.6.10-xen-sparse/arch/xen/kernel/ctrl_if.c -41ab6fa06JdF7jxUsuDcjN3UhuIAxg linux-2.6.10-xen-sparse/arch/xen/kernel/devmem.c -40f56238xFQe9T7M_U_FItM-bZIpLw linux-2.6.10-xen-sparse/arch/xen/kernel/evtchn.c -4110f478aeQWllIN7J4kouAHiAqrPw linux-2.6.10-xen-sparse/arch/xen/kernel/fixup.c -412dfae9eA3_6e6bCGUtg1mj8b56fQ linux-2.6.10-xen-sparse/arch/xen/kernel/gnttab.c -40f562392LBhwmOxVPsYdkYXMxI_ZQ linux-2.6.10-xen-sparse/arch/xen/kernel/reboot.c -414c113396tK1HTVeUalm3u-1DF16g linux-2.6.10-xen-sparse/arch/xen/kernel/skbuff.c -3f68905c5eiA-lBMQSvXLMWS1ikDEA linux-2.6.10-xen-sparse/arch/xen/kernel/xen_proc.c -41261688yS8eAyy-7kzG4KBs0xbYCA linux-2.6.10-xen-sparse/drivers/Makefile -4108f5c1WfTIrs0HZFeV39sttekCTw linux-2.6.10-xen-sparse/drivers/char/mem.c -4111308bZAIzwf_Kzu6x1TZYZ3E0_Q linux-2.6.10-xen-sparse/drivers/char/tty_io.c -40f56239Dp_vMTgz8TEbvo1hjHGc3w linux-2.6.10-xen-sparse/drivers/xen/Makefile -41768fbcncpBQf8s2l2-CwoSNIZ9uA linux-2.6.10-xen-sparse/drivers/xen/balloon/Makefile -3e6377f8i5e9eGz7Pw6fQuhuTQ7DQg linux-2.6.10-xen-sparse/drivers/xen/balloon/balloon.c -410d0893otFGghmv4dUXDUBBdY5aIA linux-2.6.10-xen-sparse/drivers/xen/blkback/Makefile -4087cf0d1XgMkooTZAiJS6NrcpLQNQ linux-2.6.10-xen-sparse/drivers/xen/blkback/blkback.c -4087cf0dZadZ8r6CEt4fNN350Yle3A linux-2.6.10-xen-sparse/drivers/xen/blkback/common.h -4087cf0dxlh29iw0w-9rxOCEGCjPcw linux-2.6.10-xen-sparse/drivers/xen/blkback/control.c -4087cf0dbuoH20fMjNZjcgrRK-1msQ linux-2.6.10-xen-sparse/drivers/xen/blkback/interface.c -4087cf0dk97tacDzxfByWV7JifUYqA linux-2.6.10-xen-sparse/drivers/xen/blkback/vbd.c -40f56239Sfle6wGv5FS0wjS_HI150A linux-2.6.10-xen-sparse/drivers/xen/blkfront/Kconfig -40f562395atl9x4suKGhPkjqLOXESg linux-2.6.10-xen-sparse/drivers/xen/blkfront/Makefile -40f56239-JNIaTzlviVJohVdoYOUpw linux-2.6.10-xen-sparse/drivers/xen/blkfront/blkfront.c -40f56239y9naBTXe40Pi2J_z3p-d1g linux-2.6.10-xen-sparse/drivers/xen/blkfront/block.h -40f56239BVfPsXBiWQitXgDRtOsiqg linux-2.6.10-xen-sparse/drivers/xen/blkfront/vbd.c -40f56239fsLjvtD8YBRAWphps4FDjg linux-2.6.10-xen-sparse/drivers/xen/console/Makefile -3e5a4e651TH-SXHoufurnWjgl5bfOA linux-2.6.10-xen-sparse/drivers/xen/console/console.c -40f56239KYxO0YabhPzCTeUuln-lnA linux-2.6.10-xen-sparse/drivers/xen/evtchn/Makefile -40f56239DoibTX6R-ZYd3QTXAB8_TA linux-2.6.10-xen-sparse/drivers/xen/evtchn/evtchn.c -410a9817HEVJvred5Oy_uKH3HFJC5Q linux-2.6.10-xen-sparse/drivers/xen/netback/Makefile -4097ba831lpGeLlPg-bfV8XarVVuoQ linux-2.6.10-xen-sparse/drivers/xen/netback/common.h -4097ba83wvv8yi5P5xugCUBAdb6O-A linux-2.6.10-xen-sparse/drivers/xen/netback/control.c -4097ba83byY5bTSugJGZ1exTxIcMKw linux-2.6.10-xen-sparse/drivers/xen/netback/interface.c -4087cf0dGmSbFhFZyIZBJzvqxY-qBw linux-2.6.10-xen-sparse/drivers/xen/netback/netback.c -40f56239lrg_Ob0BJ8WBFS1zeg2CYw linux-2.6.10-xen-sparse/drivers/xen/netfront/Kconfig -40f56239Wd4k_ycG_mFsSO1r5xKdtQ linux-2.6.10-xen-sparse/drivers/xen/netfront/Makefile -405853f6nbeazrNyEWNHBuoSg2PiPA linux-2.6.10-xen-sparse/drivers/xen/netfront/netfront.c -4108f5c1ppFXVpQzCOAZ6xXYubsjKA linux-2.6.10-xen-sparse/drivers/xen/privcmd/Makefile -3e5a4e65IUfzzMu2kZFlGEB8-rpTaA linux-2.6.10-xen-sparse/drivers/xen/privcmd/privcmd.c -412f47e4RKD-R5IS5gEXvcT8L4v8gA linux-2.6.10-xen-sparse/include/asm-generic/pgtable.h -40f56239YAjS52QG2FIAQpHDZAdGHg linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/desc.h -4107adf1E5O4ztGHNGMzCCNhcvqNow linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/dma-mapping.h -40f5623akIoBsQ3KxSB2kufkbgONXQ linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/fixmap.h -41979925z1MsKU1SfuuheM1IFDQ_bA linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/floppy.h -4118b6a418gnL6AZsTdglC92YGqYTg linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/highmem.h -40f5623aJVXQwpJMOLE99XgvGsfQ8Q linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/io.h -40f5623aKXkBBxgpLx2NcvkncQ1Yyw linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/mach-xen/irq_vectors.h -40f5623aDMCsWOFO0jktZ4e8sjwvEg linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/mach-xen/setup_arch_post.h -40f5623arsFXkGdPvIqvFi3yFXGR0Q linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/mach-xen/setup_arch_pre.h -4120f807GCO0uqsLqdZj9csxR1Wthw linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/mmu_context.h -40f5623aFTyFTR-vdiA-KaGxk5JOKQ linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/msr.h -40f5623adgjZq9nAgCt0IXdWl7udSA linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/page.h -40f5623a54NuG-7qHihGYmw4wWQnMA linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/param.h -41137cc1kkvg0cg7uxddcEfjL7L67w linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/pci.h -40f5623atCokYc2uCysSJ8jFO8TEsw linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/pgalloc.h -412e01beTwiaC8sYY4XJP8PxLST5CA linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/pgtable-2level-defs.h -40f5623aEToIXouJgO-ao5d5pcEt1w linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/pgtable-2level.h -40f5623aCCXRPlGpNthVXstGz9ZV3A linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/pgtable.h -40f5623aPCkQQfPtJSooGdhcatrvnQ linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/processor.h -412ea0afQL2CAI-f522TbLjLPMibPQ linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/ptrace.h -40f5623bzLvxr7WoJIxVf2OH4rCBJg linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/segment.h -40f5623bG_LzgG6-qwk292nTc5Wabw linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/setup.h -40f5623bgzm_9vwxpzJswlAxg298Gg linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/synch_bitops.h -40f5623bVdKP7Dt7qm8twu3NcnGNbA linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/system.h -40f5623bc8LKPRO09wY5dGDnY_YCpw linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/tlbflush.h -41062ab7uFxnCq-KtPeAm-aV8CicgA linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/vga.h -40f5623bxUbeGjkRrjDguCy_Gm8RLw linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/xor.h -41af4017PDMuSmMWtSRU5UC9Vylw5g linux-2.6.10-xen-sparse/include/asm-xen/balloon.h -40f5623bYNP7tHE2zX6YQxp9Zq2utQ linux-2.6.10-xen-sparse/include/asm-xen/ctrl_if.h -40f5623b3Eqs8pAc5WpPX8_jTzV2qw linux-2.6.10-xen-sparse/include/asm-xen/evtchn.h -419b4e9367PjTEvdjwavWN12BeBBXg linux-2.6.10-xen-sparse/include/asm-xen/foreign_page.h -412dfaeazclyNDM0cpnp60Yo4xulpQ linux-2.6.10-xen-sparse/include/asm-xen/gnttab.h -40f5623aGPlsm0u1LTO-NVZ6AGzNRQ linux-2.6.10-xen-sparse/include/asm-xen/hypervisor.h -3f108af1ylCIm82H052FVTfXACBHrw linux-2.6.10-xen-sparse/include/asm-xen/linux-public/privcmd.h -3fa8e3f0kBLeE4To2vpdi3cpJbIkbQ linux-2.6.10-xen-sparse/include/asm-xen/linux-public/suspend.h -40f5623cndVUFlkxpf7Lfx7xu8madQ linux-2.6.10-xen-sparse/include/asm-xen/multicall.h -4122466356eIBnC9ot44WSVVIFyhQA linux-2.6.10-xen-sparse/include/asm-xen/queues.h -3f689063BoW-HWV3auUJ-OqXfcGArw linux-2.6.10-xen-sparse/include/asm-xen/xen_proc.h -419b4e93z2S0gR17XTy8wg09JEwAhg linux-2.6.10-xen-sparse/include/linux/gfp.h -419dfc609zbti8rqL60tL2dHXQ_rvQ linux-2.6.10-xen-sparse/include/linux/irq.h -4124f66f4NaKNa0xPiGGykn9QaZk3w linux-2.6.10-xen-sparse/include/linux/skbuff.h -419dfc6awx7w88wk6cG9P3mPidX6LQ linux-2.6.10-xen-sparse/kernel/irq/manage.c -40f56a0ddHCSs3501MY4hRf22tctOw linux-2.6.10-xen-sparse/mkbuildtree -412f46c0LJuKAgSPGoC0Z1DEkLfuLA linux-2.6.10-xen-sparse/mm/memory.c -410a94a4KT6I6X0LVc7djB39tRDp4g linux-2.6.10-xen-sparse/mm/page_alloc.c -41505c572m-s9ATiO1LiD1GPznTTIg linux-2.6.10-xen-sparse/net/core/skbuff.c +40f562372u3A7_kfbYYixPHJJxYUxA linux-2.6.11-xen-sparse/arch/xen/Kconfig +40f56237utH41NPukqHksuNf29IC9A linux-2.6.11-xen-sparse/arch/xen/Kconfig.drivers +40f56237penAAlWVBVDpeQZNFIg8CA linux-2.6.11-xen-sparse/arch/xen/Makefile +40f56237JTc60m1FRlUxkUaGSQKrNw linux-2.6.11-xen-sparse/arch/xen/boot/Makefile +40f56237hRxbacU_3PdoAl6DjZ3Jnw linux-2.6.11-xen-sparse/arch/xen/configs/xen0_defconfig +40f56237wubfjJKlfIzZlI3ZM2VgGA linux-2.6.11-xen-sparse/arch/xen/configs/xenU_defconfig +40f56237Mta0yHNaMS_qtM2rge0qYA linux-2.6.11-xen-sparse/arch/xen/i386/Kconfig +40f56238u2CJdXNpjsZgHBxeVyY-2g linux-2.6.11-xen-sparse/arch/xen/i386/Makefile +40f56238eczveJ86k_4hNxCLRQIF-g linux-2.6.11-xen-sparse/arch/xen/i386/kernel/Makefile +40f56238rXVTJQKbBuXXLH52qEArcg linux-2.6.11-xen-sparse/arch/xen/i386/kernel/cpu/Makefile +40f562385s4lr6Zg92gExe7UQ4A76Q linux-2.6.11-xen-sparse/arch/xen/i386/kernel/cpu/common.c +41ab440bnpxZdWShZrGgM9pPaz5rmA linux-2.6.11-xen-sparse/arch/xen/i386/kernel/cpu/mtrr/Makefile +41ab440bBKWz-aEOEojU4PAMXe3Ppg linux-2.6.11-xen-sparse/arch/xen/i386/kernel/cpu/mtrr/main.c +40f56238XDtHSijkAFlbv1PT8Bhw_Q linux-2.6.11-xen-sparse/arch/xen/i386/kernel/entry.S +40f56238bnvciAuyzAiMkdzGErYt1A linux-2.6.11-xen-sparse/arch/xen/i386/kernel/head.S +40f58a0d31M2EkuPbG94ns_nOi0PVA linux-2.6.11-xen-sparse/arch/xen/i386/kernel/i386_ksyms.c +40faa751_zbZlAmLyQgCXdYekVFdWA linux-2.6.11-xen-sparse/arch/xen/i386/kernel/ioport.c +40f56238ue3YRsK52HG7iccNzP1AwQ linux-2.6.11-xen-sparse/arch/xen/i386/kernel/ldt.c +41d54a76YMCA67S8J-TBT3J62Wx6yA linux-2.6.11-xen-sparse/arch/xen/i386/kernel/microcode.c +4107adf1cNtsuOxOB4T6paAoY2R2PA linux-2.6.11-xen-sparse/arch/xen/i386/kernel/pci-dma.c +40f56238a8iOVDEoostsbun_sy2i4g linux-2.6.11-xen-sparse/arch/xen/i386/kernel/process.c +40f56238YQIJoYG2ehDGEcdTgLmGbg linux-2.6.11-xen-sparse/arch/xen/i386/kernel/setup.c +40f56238nWMQg7CKbyTy0KJNvCzbtg linux-2.6.11-xen-sparse/arch/xen/i386/kernel/signal.c +40f56238qVGkpO_ycnQA8k03kQzAgA linux-2.6.11-xen-sparse/arch/xen/i386/kernel/time.c +40f56238NzTgeO63RGoxHrW5NQeO3Q linux-2.6.11-xen-sparse/arch/xen/i386/kernel/timers/Makefile +40f56238BMqG5PuSHufpjbvp_helBw linux-2.6.11-xen-sparse/arch/xen/i386/kernel/timers/timer_tsc.c +40f562389xNa78YBZciUibQjyRU_Lg linux-2.6.11-xen-sparse/arch/xen/i386/kernel/traps.c +40f56238JypKAUG01ZojFwH7qnZ5uA linux-2.6.11-xen-sparse/arch/xen/i386/kernel/vsyscall.S +40f56238wi6AdNQjm0RT57bSkwb6hg linux-2.6.11-xen-sparse/arch/xen/i386/kernel/vsyscall.lds +40f56238a3w6-byOzexIlMgni76Lcg linux-2.6.11-xen-sparse/arch/xen/i386/mm/Makefile +40f56238ILx8xlbywNbzTdv5Zr4xXQ linux-2.6.11-xen-sparse/arch/xen/i386/mm/fault.c +4118cc35CbY8rfGVspF5O-7EkXBEAA linux-2.6.11-xen-sparse/arch/xen/i386/mm/highmem.c +40f562383SKvDStdtrvzr5fyCbW4rw linux-2.6.11-xen-sparse/arch/xen/i386/mm/hypervisor.c +40f56239xcNylAxuGsQHwi1AyMLV8w linux-2.6.11-xen-sparse/arch/xen/i386/mm/init.c +41062ab7CjxC1UBaFhOMWWdhHkIUyg linux-2.6.11-xen-sparse/arch/xen/i386/mm/ioremap.c +413b5ab8LIowAnQrEmaOJSdmqm96jQ linux-2.6.11-xen-sparse/arch/xen/i386/mm/pageattr.c +40f5623906UYHv1rsVUeRc0tFT0dWw linux-2.6.11-xen-sparse/arch/xen/i386/mm/pgtable.c +4107adf12ndy94MidCaivDibJ3pPAg linux-2.6.11-xen-sparse/arch/xen/i386/pci/Makefile +4107adf1WcCgkhsdLTRGX52cOG1vJg linux-2.6.11-xen-sparse/arch/xen/i386/pci/direct.c +4107adf1s5u6249DNPUViX1YNagbUQ linux-2.6.11-xen-sparse/arch/xen/i386/pci/irq.c +40f56239zOksGg_H4XD4ye6iZNtoZA linux-2.6.11-xen-sparse/arch/xen/kernel/Makefile +40f56239bvOjuuuViZ0XMlNiREFC0A linux-2.6.11-xen-sparse/arch/xen/kernel/ctrl_if.c +41ab6fa06JdF7jxUsuDcjN3UhuIAxg linux-2.6.11-xen-sparse/arch/xen/kernel/devmem.c +40f56238xFQe9T7M_U_FItM-bZIpLw linux-2.6.11-xen-sparse/arch/xen/kernel/evtchn.c +4110f478aeQWllIN7J4kouAHiAqrPw linux-2.6.11-xen-sparse/arch/xen/kernel/fixup.c +412dfae9eA3_6e6bCGUtg1mj8b56fQ linux-2.6.11-xen-sparse/arch/xen/kernel/gnttab.c +40f562392LBhwmOxVPsYdkYXMxI_ZQ linux-2.6.11-xen-sparse/arch/xen/kernel/reboot.c +414c113396tK1HTVeUalm3u-1DF16g linux-2.6.11-xen-sparse/arch/xen/kernel/skbuff.c +3f68905c5eiA-lBMQSvXLMWS1ikDEA linux-2.6.11-xen-sparse/arch/xen/kernel/xen_proc.c +41261688yS8eAyy-7kzG4KBs0xbYCA linux-2.6.11-xen-sparse/drivers/Makefile +4108f5c1WfTIrs0HZFeV39sttekCTw linux-2.6.11-xen-sparse/drivers/char/mem.c +4111308bZAIzwf_Kzu6x1TZYZ3E0_Q linux-2.6.11-xen-sparse/drivers/char/tty_io.c +40f56239Dp_vMTgz8TEbvo1hjHGc3w linux-2.6.11-xen-sparse/drivers/xen/Makefile +41768fbcncpBQf8s2l2-CwoSNIZ9uA linux-2.6.11-xen-sparse/drivers/xen/balloon/Makefile +3e6377f8i5e9eGz7Pw6fQuhuTQ7DQg linux-2.6.11-xen-sparse/drivers/xen/balloon/balloon.c +410d0893otFGghmv4dUXDUBBdY5aIA linux-2.6.11-xen-sparse/drivers/xen/blkback/Makefile +4087cf0d1XgMkooTZAiJS6NrcpLQNQ linux-2.6.11-xen-sparse/drivers/xen/blkback/blkback.c +4087cf0dZadZ8r6CEt4fNN350Yle3A linux-2.6.11-xen-sparse/drivers/xen/blkback/common.h +4087cf0dxlh29iw0w-9rxOCEGCjPcw linux-2.6.11-xen-sparse/drivers/xen/blkback/control.c +4087cf0dbuoH20fMjNZjcgrRK-1msQ linux-2.6.11-xen-sparse/drivers/xen/blkback/interface.c +4087cf0dk97tacDzxfByWV7JifUYqA linux-2.6.11-xen-sparse/drivers/xen/blkback/vbd.c +40f56239Sfle6wGv5FS0wjS_HI150A linux-2.6.11-xen-sparse/drivers/xen/blkfront/Kconfig +40f562395atl9x4suKGhPkjqLOXESg linux-2.6.11-xen-sparse/drivers/xen/blkfront/Makefile +40f56239-JNIaTzlviVJohVdoYOUpw linux-2.6.11-xen-sparse/drivers/xen/blkfront/blkfront.c +40f56239y9naBTXe40Pi2J_z3p-d1g linux-2.6.11-xen-sparse/drivers/xen/blkfront/block.h +40f56239BVfPsXBiWQitXgDRtOsiqg linux-2.6.11-xen-sparse/drivers/xen/blkfront/vbd.c +40f56239fsLjvtD8YBRAWphps4FDjg linux-2.6.11-xen-sparse/drivers/xen/console/Makefile +3e5a4e651TH-SXHoufurnWjgl5bfOA linux-2.6.11-xen-sparse/drivers/xen/console/console.c +40f56239KYxO0YabhPzCTeUuln-lnA linux-2.6.11-xen-sparse/drivers/xen/evtchn/Makefile +40f56239DoibTX6R-ZYd3QTXAB8_TA linux-2.6.11-xen-sparse/drivers/xen/evtchn/evtchn.c +410a9817HEVJvred5Oy_uKH3HFJC5Q linux-2.6.11-xen-sparse/drivers/xen/netback/Makefile +4097ba831lpGeLlPg-bfV8XarVVuoQ linux-2.6.11-xen-sparse/drivers/xen/netback/common.h +4097ba83wvv8yi5P5xugCUBAdb6O-A linux-2.6.11-xen-sparse/drivers/xen/netback/control.c +4097ba83byY5bTSugJGZ1exTxIcMKw linux-2.6.11-xen-sparse/drivers/xen/netback/interface.c +4087cf0dGmSbFhFZyIZBJzvqxY-qBw linux-2.6.11-xen-sparse/drivers/xen/netback/netback.c +40f56239lrg_Ob0BJ8WBFS1zeg2CYw linux-2.6.11-xen-sparse/drivers/xen/netfront/Kconfig +40f56239Wd4k_ycG_mFsSO1r5xKdtQ linux-2.6.11-xen-sparse/drivers/xen/netfront/Makefile +405853f6nbeazrNyEWNHBuoSg2PiPA linux-2.6.11-xen-sparse/drivers/xen/netfront/netfront.c +4108f5c1ppFXVpQzCOAZ6xXYubsjKA linux-2.6.11-xen-sparse/drivers/xen/privcmd/Makefile +3e5a4e65IUfzzMu2kZFlGEB8-rpTaA linux-2.6.11-xen-sparse/drivers/xen/privcmd/privcmd.c +412f47e4RKD-R5IS5gEXvcT8L4v8gA linux-2.6.11-xen-sparse/include/asm-generic/pgtable.h +40f56239YAjS52QG2FIAQpHDZAdGHg linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/desc.h +4107adf1E5O4ztGHNGMzCCNhcvqNow linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/dma-mapping.h +40f5623akIoBsQ3KxSB2kufkbgONXQ linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/fixmap.h +41979925z1MsKU1SfuuheM1IFDQ_bA linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/floppy.h +4118b6a418gnL6AZsTdglC92YGqYTg linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/highmem.h +40f5623aJVXQwpJMOLE99XgvGsfQ8Q linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/io.h +40f5623aKXkBBxgpLx2NcvkncQ1Yyw linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mach-xen/irq_vectors.h +40f5623aDMCsWOFO0jktZ4e8sjwvEg linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mach-xen/setup_arch_post.h +40f5623arsFXkGdPvIqvFi3yFXGR0Q linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mach-xen/setup_arch_pre.h +4120f807GCO0uqsLqdZj9csxR1Wthw linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mmu_context.h +40f5623aFTyFTR-vdiA-KaGxk5JOKQ linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/msr.h +40f5623adgjZq9nAgCt0IXdWl7udSA linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/page.h +40f5623a54NuG-7qHihGYmw4wWQnMA linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/param.h +41137cc1kkvg0cg7uxddcEfjL7L67w linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/pci.h +40f5623atCokYc2uCysSJ8jFO8TEsw linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/pgalloc.h +412e01beTwiaC8sYY4XJP8PxLST5CA linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/pgtable-2level-defs.h +40f5623aEToIXouJgO-ao5d5pcEt1w linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/pgtable-2level.h +40f5623aCCXRPlGpNthVXstGz9ZV3A linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/pgtable.h +40f5623aPCkQQfPtJSooGdhcatrvnQ linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/processor.h +412ea0afQL2CAI-f522TbLjLPMibPQ linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/ptrace.h +40f5623bzLvxr7WoJIxVf2OH4rCBJg linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/segment.h +40f5623bG_LzgG6-qwk292nTc5Wabw linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/setup.h +40f5623bgzm_9vwxpzJswlAxg298Gg linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/synch_bitops.h +40f5623bVdKP7Dt7qm8twu3NcnGNbA linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/system.h +40f5623bc8LKPRO09wY5dGDnY_YCpw linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/tlbflush.h +41062ab7uFxnCq-KtPeAm-aV8CicgA linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/vga.h +40f5623bxUbeGjkRrjDguCy_Gm8RLw linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/xor.h +41af4017PDMuSmMWtSRU5UC9Vylw5g linux-2.6.11-xen-sparse/include/asm-xen/balloon.h +40f5623bYNP7tHE2zX6YQxp9Zq2utQ linux-2.6.11-xen-sparse/include/asm-xen/ctrl_if.h +40f5623b3Eqs8pAc5WpPX8_jTzV2qw linux-2.6.11-xen-sparse/include/asm-xen/evtchn.h +419b4e9367PjTEvdjwavWN12BeBBXg linux-2.6.11-xen-sparse/include/asm-xen/foreign_page.h +412dfaeazclyNDM0cpnp60Yo4xulpQ linux-2.6.11-xen-sparse/include/asm-xen/gnttab.h +40f5623aGPlsm0u1LTO-NVZ6AGzNRQ linux-2.6.11-xen-sparse/include/asm-xen/hypervisor.h +3f108af1ylCIm82H052FVTfXACBHrw linux-2.6.11-xen-sparse/include/asm-xen/linux-public/privcmd.h +3fa8e3f0kBLeE4To2vpdi3cpJbIkbQ linux-2.6.11-xen-sparse/include/asm-xen/linux-public/suspend.h +40f5623cndVUFlkxpf7Lfx7xu8madQ linux-2.6.11-xen-sparse/include/asm-xen/multicall.h +4122466356eIBnC9ot44WSVVIFyhQA linux-2.6.11-xen-sparse/include/asm-xen/queues.h +3f689063BoW-HWV3auUJ-OqXfcGArw linux-2.6.11-xen-sparse/include/asm-xen/xen_proc.h +419b4e93z2S0gR17XTy8wg09JEwAhg linux-2.6.11-xen-sparse/include/linux/gfp.h +419dfc609zbti8rqL60tL2dHXQ_rvQ linux-2.6.11-xen-sparse/include/linux/irq.h +419dfc6awx7w88wk6cG9P3mPidX6LQ linux-2.6.11-xen-sparse/kernel/irq/manage.c +40f56a0ddHCSs3501MY4hRf22tctOw linux-2.6.11-xen-sparse/mkbuildtree +412f46c0LJuKAgSPGoC0Z1DEkLfuLA linux-2.6.11-xen-sparse/mm/memory.c +410a94a4KT6I6X0LVc7djB39tRDp4g linux-2.6.11-xen-sparse/mm/page_alloc.c 413cb1e4zst25MDYjg63Y-NGC5_pLg netbsd-2.0-xen-sparse/Makefile 413cb1e5c_Mkxf_X0zimEhTKI_l4DA netbsd-2.0-xen-sparse/mkbuildtree 413cb1e5kY_Zil7-b0kI6hvCIxBEYg netbsd-2.0-xen-sparse/nbconfig-xen diff --git a/linux-2.6.10-xen-sparse/include/linux/skbuff.h b/linux-2.6.10-xen-sparse/include/linux/skbuff.h deleted file mode 100644 index 57a2843faa..0000000000 --- a/linux-2.6.10-xen-sparse/include/linux/skbuff.h +++ /dev/null @@ -1,1189 +0,0 @@ -/* - * Definitions for the 'struct sk_buff' memory handlers. - * - * Authors: - * Alan Cox, - * Florian La Roche, - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#ifndef _LINUX_SKBUFF_H -#define _LINUX_SKBUFF_H - -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -#define HAVE_ALLOC_SKB /* For the drivers to know */ -#define HAVE_ALIGNABLE_SKB /* Ditto 8) */ -#define SLAB_SKB /* Slabified skbuffs */ - -#define CHECKSUM_NONE 0 -#define CHECKSUM_HW 1 -#define CHECKSUM_UNNECESSARY 2 - -#define SKB_DATA_ALIGN(X) (((X) + (SMP_CACHE_BYTES - 1)) & \ - ~(SMP_CACHE_BYTES - 1)) -#define SKB_MAX_ORDER(X, ORDER) (((PAGE_SIZE << (ORDER)) - (X) - \ - sizeof(struct skb_shared_info)) & \ - ~(SMP_CACHE_BYTES - 1)) -#define SKB_MAX_HEAD(X) (SKB_MAX_ORDER((X), 0)) -#define SKB_MAX_ALLOC (SKB_MAX_ORDER(0, 2)) - -/* A. Checksumming of received packets by device. - * - * NONE: device failed to checksum this packet. - * skb->csum is undefined. - * - * UNNECESSARY: device parsed packet and wouldbe verified checksum. - * skb->csum is undefined. - * It is bad option, but, unfortunately, many of vendors do this. - * Apparently with secret goal to sell you new device, when you - * will add new protocol to your host. F.e. IPv6. 8) - * - * HW: the most generic way. Device supplied checksum of _all_ - * the packet as seen by netif_rx in skb->csum. - * NOTE: Even if device supports only some protocols, but - * is able to produce some skb->csum, it MUST use HW, - * not UNNECESSARY. - * - * B. Checksumming on output. - * - * NONE: skb is checksummed by protocol or csum is not required. - * - * HW: device is required to csum packet as seen by hard_start_xmit - * from skb->h.raw to the end and to record the checksum - * at skb->h.raw+skb->csum. - * - * Device must show its capabilities in dev->features, set - * at device setup time. - * NETIF_F_HW_CSUM - it is clever device, it is able to checksum - * everything. - * NETIF_F_NO_CSUM - loopback or reliable single hop media. - * NETIF_F_IP_CSUM - device is dumb. It is able to csum only - * TCP/UDP over IPv4. Sigh. Vendors like this - * way by an unknown reason. Though, see comment above - * about CHECKSUM_UNNECESSARY. 8) - * - * Any questions? No questions, good. --ANK - */ - -#ifdef __i386__ -#define NET_CALLER(arg) (*(((void **)&arg) - 1)) -#else -#define NET_CALLER(arg) __builtin_return_address(0) -#endif - -struct net_device; - -#ifdef CONFIG_NETFILTER -struct nf_conntrack { - atomic_t use; - void (*destroy)(struct nf_conntrack *); -}; - -#ifdef CONFIG_BRIDGE_NETFILTER -struct nf_bridge_info { - atomic_t use; - struct net_device *physindev; - struct net_device *physoutdev; -#if defined(CONFIG_VLAN_8021Q) || defined(CONFIG_VLAN_8021Q_MODULE) - struct net_device *netoutdev; -#endif - unsigned int mask; - unsigned long data[32 / sizeof(unsigned long)]; -}; -#endif - -#endif - -struct sk_buff_head { - /* These two members must be first. */ - struct sk_buff *next; - struct sk_buff *prev; - - __u32 qlen; - spinlock_t lock; -}; - -struct sk_buff; - -/* To allow 64K frame to be packed as single skb without frag_list */ -#define MAX_SKB_FRAGS (65536/PAGE_SIZE + 2) - -typedef struct skb_frag_struct skb_frag_t; - -struct skb_frag_struct { - struct page *page; - __u16 page_offset; - __u16 size; -}; - -/* This data is invariant across clones and lives at - * the end of the header data, ie. at skb->end. - */ -struct skb_shared_info { - atomic_t dataref; - unsigned int nr_frags; - unsigned short tso_size; - unsigned short tso_segs; - struct sk_buff *frag_list; - skb_frag_t frags[MAX_SKB_FRAGS]; -}; - -/** - * struct sk_buff - socket buffer - * @next: Next buffer in list - * @prev: Previous buffer in list - * @list: List we are on - * @sk: Socket we are owned by - * @stamp: Time we arrived - * @dev: Device we arrived on/are leaving by - * @input_dev: Device we arrived on - * @real_dev: The real device we are using - * @h: Transport layer header - * @nh: Network layer header - * @mac: Link layer header - * @dst: FIXME: Describe this field - * @cb: Control buffer. Free for use by every layer. Put private vars here - * @len: Length of actual data - * @data_len: Data length - * @mac_len: Length of link layer header - * @csum: Checksum - * @__unused: Dead field, may be reused - * @cloned: Head may be cloned (check refcnt to be sure) - * @pkt_type: Packet class - * @ip_summed: Driver fed us an IP checksum - * @priority: Packet queueing priority - * @users: User count - see {datagram,tcp}.c - * @protocol: Packet protocol from driver - * @security: Security level of packet - * @truesize: Buffer size - * @head: Head of buffer - * @data: Data head pointer - * @tail: Tail pointer - * @end: End pointer - * @destructor: Destruct function - * @nfmark: Can be used for communication between hooks - * @nfcache: Cache info - * @nfct: Associated connection, if any - * @nfctinfo: Relationship of this skb to the connection - * @nf_debug: Netfilter debugging - * @nf_bridge: Saved data about a bridged frame - see br_netfilter.c - * @private: Data which is private to the HIPPI implementation - * @tc_index: Traffic control index - */ - -struct sk_buff { - /* These two members must be first. */ - struct sk_buff *next; - struct sk_buff *prev; - - struct sk_buff_head *list; - struct sock *sk; - struct timeval stamp; - struct net_device *dev; - struct net_device *input_dev; - struct net_device *real_dev; - - union { - struct tcphdr *th; - struct udphdr *uh; - struct icmphdr *icmph; - struct igmphdr *igmph; - struct iphdr *ipiph; - struct ipv6hdr *ipv6h; - unsigned char *raw; - } h; - - union { - struct iphdr *iph; - struct ipv6hdr *ipv6h; - struct arphdr *arph; - unsigned char *raw; - } nh; - - union { - unsigned char *raw; - } mac; - - struct dst_entry *dst; - struct sec_path *sp; - - /* - * This is the control buffer. It is free to use for every - * layer. Please put your private variables there. If you - * want to keep them across layers you have to do a skb_clone() - * first. This is owned by whoever has the skb queued ATM. - */ - char cb[40]; - - unsigned int len, - data_len, - mac_len, - csum; - unsigned char local_df, - cloned, - pkt_type, - ip_summed; - __u32 priority; - unsigned short protocol, - security; - - void (*destructor)(struct sk_buff *skb); -#ifdef CONFIG_NETFILTER - unsigned long nfmark; - __u32 nfcache; - __u32 nfctinfo; - struct nf_conntrack *nfct; -#ifdef CONFIG_NETFILTER_DEBUG - unsigned int nf_debug; -#endif -#ifdef CONFIG_BRIDGE_NETFILTER - struct nf_bridge_info *nf_bridge; -#endif -#endif /* CONFIG_NETFILTER */ -#if defined(CONFIG_HIPPI) - union { - __u32 ifield; - } private; -#endif -#ifdef CONFIG_NET_SCHED - __u32 tc_index; /* traffic control index */ -#ifdef CONFIG_NET_CLS_ACT - __u32 tc_verd; /* traffic control verdict */ - __u32 tc_classid; /* traffic control classid */ -#endif - -#endif - - - /* These elements must be at the end, see alloc_skb() for details. */ - unsigned int truesize; - atomic_t users; - unsigned char *head, - *data, - *tail, - *end; -}; - -#ifdef __KERNEL__ -/* - * Handling routines are only of interest to the kernel - */ -#include - -#include - -extern void __kfree_skb(struct sk_buff *skb); -extern struct sk_buff *alloc_skb(unsigned int size, int priority); -extern struct sk_buff *alloc_skb_from_cache(kmem_cache_t *cp, - unsigned int size, int priority); -extern void kfree_skbmem(struct sk_buff *skb); -extern struct sk_buff *skb_clone(struct sk_buff *skb, int priority); -extern struct sk_buff *skb_copy(const struct sk_buff *skb, int priority); -extern struct sk_buff *pskb_copy(struct sk_buff *skb, int gfp_mask); -extern int pskb_expand_head(struct sk_buff *skb, - int nhead, int ntail, int gfp_mask); -extern struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, - unsigned int headroom); -extern struct sk_buff *skb_copy_expand(const struct sk_buff *skb, - int newheadroom, int newtailroom, - int priority); -extern struct sk_buff * skb_pad(struct sk_buff *skb, int pad); -#define dev_kfree_skb(a) kfree_skb(a) -extern void skb_over_panic(struct sk_buff *skb, int len, - void *here); -extern void skb_under_panic(struct sk_buff *skb, int len, - void *here); - -/* Internal */ -#define skb_shinfo(SKB) ((struct skb_shared_info *)((SKB)->end)) - -/** - * skb_queue_empty - check if a queue is empty - * @list: queue head - * - * Returns true if the queue is empty, false otherwise. - */ -static inline int skb_queue_empty(const struct sk_buff_head *list) -{ - return list->next == (struct sk_buff *)list; -} - -/** - * skb_get - reference buffer - * @skb: buffer to reference - * - * Makes another reference to a socket buffer and returns a pointer - * to the buffer. - */ -static inline struct sk_buff *skb_get(struct sk_buff *skb) -{ - atomic_inc(&skb->users); - return skb; -} - -/* - * If users == 1, we are the only owner and are can avoid redundant - * atomic change. - */ - -/** - * kfree_skb - free an sk_buff - * @skb: buffer to free - * - * Drop a reference to the buffer and free it if the usage count has - * hit zero. - */ -static inline void kfree_skb(struct sk_buff *skb) -{ - if (atomic_read(&skb->users) == 1 || atomic_dec_and_test(&skb->users)) - __kfree_skb(skb); -} - -/* Use this if you didn't touch the skb state [for fast switching] */ -static inline void kfree_skb_fast(struct sk_buff *skb) -{ - if (atomic_read(&skb->users) == 1 || atomic_dec_and_test(&skb->users)) - kfree_skbmem(skb); -} - -/** - * skb_cloned - is the buffer a clone - * @skb: buffer to check - * - * Returns true if the buffer was generated with skb_clone() and is - * one of multiple shared copies of the buffer. Cloned buffers are - * shared data so must not be written to under normal circumstances. - */ -static inline int skb_cloned(const struct sk_buff *skb) -{ - return skb->cloned && atomic_read(&skb_shinfo(skb)->dataref) != 1; -} - -/** - * skb_shared - is the buffer shared - * @skb: buffer to check - * - * Returns true if more than one person has a reference to this - * buffer. - */ -static inline int skb_shared(const struct sk_buff *skb) -{ - return atomic_read(&skb->users) != 1; -} - -/** - * skb_share_check - check if buffer is shared and if so clone it - * @skb: buffer to check - * @pri: priority for memory allocation - * - * If the buffer is shared the buffer is cloned and the old copy - * drops a reference. A new clone with a single reference is returned. - * If the buffer is not shared the original buffer is returned. When - * being called from interrupt status or with spinlocks held pri must - * be GFP_ATOMIC. - * - * NULL is returned on a memory allocation failure. - */ -static inline struct sk_buff *skb_share_check(struct sk_buff *skb, int pri) -{ - might_sleep_if(pri & __GFP_WAIT); - if (skb_shared(skb)) { - struct sk_buff *nskb = skb_clone(skb, pri); - kfree_skb(skb); - skb = nskb; - } - return skb; -} - -/* - * Copy shared buffers into a new sk_buff. We effectively do COW on - * packets to handle cases where we have a local reader and forward - * and a couple of other messy ones. The normal one is tcpdumping - * a packet thats being forwarded. - */ - -/** - * skb_unshare - make a copy of a shared buffer - * @skb: buffer to check - * @pri: priority for memory allocation - * - * If the socket buffer is a clone then this function creates a new - * copy of the data, drops a reference count on the old copy and returns - * the new copy with the reference count at 1. If the buffer is not a clone - * the original buffer is returned. When called with a spinlock held or - * from interrupt state @pri must be %GFP_ATOMIC - * - * %NULL is returned on a memory allocation failure. - */ -static inline struct sk_buff *skb_unshare(struct sk_buff *skb, int pri) -{ - might_sleep_if(pri & __GFP_WAIT); - if (skb_cloned(skb)) { - struct sk_buff *nskb = skb_copy(skb, pri); - kfree_skb(skb); /* Free our shared copy */ - skb = nskb; - } - return skb; -} - -/** - * skb_peek - * @list_: list to peek at - * - * Peek an &sk_buff. Unlike most other operations you _MUST_ - * be careful with this one. A peek leaves the buffer on the - * list and someone else may run off with it. You must hold - * the appropriate locks or have a private queue to do this. - * - * Returns %NULL for an empty list or a pointer to the head element. - * The reference count is not incremented and the reference is therefore - * volatile. Use with caution. - */ -static inline struct sk_buff *skb_peek(struct sk_buff_head *list_) -{ - struct sk_buff *list = ((struct sk_buff *)list_)->next; - if (list == (struct sk_buff *)list_) - list = NULL; - return list; -} - -/** - * skb_peek_tail - * @list_: list to peek at - * - * Peek an &sk_buff. Unlike most other operations you _MUST_ - * be careful with this one. A peek leaves the buffer on the - * list and someone else may run off with it. You must hold - * the appropriate locks or have a private queue to do this. - * - * Returns %NULL for an empty list or a pointer to the tail element. - * The reference count is not incremented and the reference is therefore - * volatile. Use with caution. - */ -static inline struct sk_buff *skb_peek_tail(struct sk_buff_head *list_) -{ - struct sk_buff *list = ((struct sk_buff *)list_)->prev; - if (list == (struct sk_buff *)list_) - list = NULL; - return list; -} - -/** - * skb_queue_len - get queue length - * @list_: list to measure - * - * Return the length of an &sk_buff queue. - */ -static inline __u32 skb_queue_len(const struct sk_buff_head *list_) -{ - return list_->qlen; -} - -static inline void skb_queue_head_init(struct sk_buff_head *list) -{ - spin_lock_init(&list->lock); - list->prev = list->next = (struct sk_buff *)list; - list->qlen = 0; -} - -/* - * Insert an sk_buff at the start of a list. - * - * The "__skb_xxxx()" functions are the non-atomic ones that - * can only be called with interrupts disabled. - */ - -/** - * __skb_queue_head - queue a buffer at the list head - * @list: list to use - * @newsk: buffer to queue - * - * Queue a buffer at the start of a list. This function takes no locks - * and you must therefore hold required locks before calling it. - * - * A buffer cannot be placed on two lists at the same time. - */ -extern void skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk); -static inline void __skb_queue_head(struct sk_buff_head *list, - struct sk_buff *newsk) -{ - struct sk_buff *prev, *next; - - newsk->list = list; - list->qlen++; - prev = (struct sk_buff *)list; - next = prev->next; - newsk->next = next; - newsk->prev = prev; - next->prev = prev->next = newsk; -} - -/** - * __skb_queue_tail - queue a buffer at the list tail - * @list: list to use - * @newsk: buffer to queue - * - * Queue a buffer at the end of a list. This function takes no locks - * and you must therefore hold required locks before calling it. - * - * A buffer cannot be placed on two lists at the same time. - */ -extern void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk); -static inline void __skb_queue_tail(struct sk_buff_head *list, - struct sk_buff *newsk) -{ - struct sk_buff *prev, *next; - - newsk->list = list; - list->qlen++; - next = (struct sk_buff *)list; - prev = next->prev; - newsk->next = next; - newsk->prev = prev; - next->prev = prev->next = newsk; -} - - -/** - * __skb_dequeue - remove from the head of the queue - * @list: list to dequeue from - * - * Remove the head of the list. This function does not take any locks - * so must be used with appropriate locks held only. The head item is - * returned or %NULL if the list is empty. - */ -extern struct sk_buff *skb_dequeue(struct sk_buff_head *list); -static inline struct sk_buff *__skb_dequeue(struct sk_buff_head *list) -{ - struct sk_buff *next, *prev, *result; - - prev = (struct sk_buff *) list; - next = prev->next; - result = NULL; - if (next != prev) { - result = next; - next = next->next; - list->qlen--; - next->prev = prev; - prev->next = next; - result->next = result->prev = NULL; - result->list = NULL; - } - return result; -} - - -/* - * Insert a packet on a list. - */ -extern void skb_insert(struct sk_buff *old, struct sk_buff *newsk); -static inline void __skb_insert(struct sk_buff *newsk, - struct sk_buff *prev, struct sk_buff *next, - struct sk_buff_head *list) -{ - newsk->next = next; - newsk->prev = prev; - next->prev = prev->next = newsk; - newsk->list = list; - list->qlen++; -} - -/* - * Place a packet after a given packet in a list. - */ -extern void skb_append(struct sk_buff *old, struct sk_buff *newsk); -static inline void __skb_append(struct sk_buff *old, struct sk_buff *newsk) -{ - __skb_insert(newsk, old, old->next, old->list); -} - -/* - * remove sk_buff from list. _Must_ be called atomically, and with - * the list known.. - */ -extern void skb_unlink(struct sk_buff *skb); -static inline void __skb_unlink(struct sk_buff *skb, struct sk_buff_head *list) -{ - struct sk_buff *next, *prev; - - list->qlen--; - next = skb->next; - prev = skb->prev; - skb->next = skb->prev = NULL; - skb->list = NULL; - next->prev = prev; - prev->next = next; -} - - -/* XXX: more streamlined implementation */ - -/** - * __skb_dequeue_tail - remove from the tail of the queue - * @list: list to dequeue from - * - * Remove the tail of the list. This function does not take any locks - * so must be used with appropriate locks held only. The tail item is - * returned or %NULL if the list is empty. - */ -extern struct sk_buff *skb_dequeue_tail(struct sk_buff_head *list); -static inline struct sk_buff *__skb_dequeue_tail(struct sk_buff_head *list) -{ - struct sk_buff *skb = skb_peek_tail(list); - if (skb) - __skb_unlink(skb, list); - return skb; -} - - -static inline int skb_is_nonlinear(const struct sk_buff *skb) -{ - return skb->data_len; -} - -static inline unsigned int skb_headlen(const struct sk_buff *skb) -{ - return skb->len - skb->data_len; -} - -static inline int skb_pagelen(const struct sk_buff *skb) -{ - int i, len = 0; - - for (i = (int)skb_shinfo(skb)->nr_frags - 1; i >= 0; i--) - len += skb_shinfo(skb)->frags[i].size; - return len + skb_headlen(skb); -} - -static inline void skb_fill_page_desc(struct sk_buff *skb, int i, - struct page *page, int off, int size) -{ - skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - - frag->page = page; - frag->page_offset = off; - frag->size = size; - skb_shinfo(skb)->nr_frags = i + 1; -} - -#define SKB_PAGE_ASSERT(skb) BUG_ON(skb_shinfo(skb)->nr_frags) -#define SKB_FRAG_ASSERT(skb) BUG_ON(skb_shinfo(skb)->frag_list) -#define SKB_LINEAR_ASSERT(skb) BUG_ON(skb_is_nonlinear(skb)) - -/* - * Add data to an sk_buff - */ -static inline unsigned char *__skb_put(struct sk_buff *skb, unsigned int len) -{ - unsigned char *tmp = skb->tail; - SKB_LINEAR_ASSERT(skb); - skb->tail += len; - skb->len += len; - return tmp; -} - -/** - * skb_put - add data to a buffer - * @skb: buffer to use - * @len: amount of data to add - * - * This function extends the used data area of the buffer. If this would - * exceed the total buffer size the kernel will panic. A pointer to the - * first byte of the extra data is returned. - */ -static inline unsigned char *skb_put(struct sk_buff *skb, unsigned int len) -{ - unsigned char *tmp = skb->tail; - SKB_LINEAR_ASSERT(skb); - skb->tail += len; - skb->len += len; - if (unlikely(skb->tail>skb->end)) - skb_over_panic(skb, len, current_text_addr()); - return tmp; -} - -static inline unsigned char *__skb_push(struct sk_buff *skb, unsigned int len) -{ - skb->data -= len; - skb->len += len; - return skb->data; -} - -/** - * skb_push - add data to the start of a buffer - * @skb: buffer to use - * @len: amount of data to add - * - * This function extends the used data area of the buffer at the buffer - * start. If this would exceed the total buffer headroom the kernel will - * panic. A pointer to the first byte of the extra data is returned. - */ -static inline unsigned char *skb_push(struct sk_buff *skb, unsigned int len) -{ - skb->data -= len; - skb->len += len; - if (unlikely(skb->datahead)) - skb_under_panic(skb, len, current_text_addr()); - return skb->data; -} - -static inline unsigned char *__skb_pull(struct sk_buff *skb, unsigned int len) -{ - skb->len -= len; - BUG_ON(skb->len < skb->data_len); - return skb->data += len; -} - -/** - * skb_pull - remove data from the start of a buffer - * @skb: buffer to use - * @len: amount of data to remove - * - * This function removes data from the start of a buffer, returning - * the memory to the headroom. A pointer to the next data in the buffer - * is returned. Once the data has been pulled future pushes will overwrite - * the old data. - */ -static inline unsigned char *skb_pull(struct sk_buff *skb, unsigned int len) -{ - return unlikely(len > skb->len) ? NULL : __skb_pull(skb, len); -} - -extern unsigned char *__pskb_pull_tail(struct sk_buff *skb, int delta); - -static inline unsigned char *__pskb_pull(struct sk_buff *skb, unsigned int len) -{ - if (len > skb_headlen(skb) && - !__pskb_pull_tail(skb, len-skb_headlen(skb))) - return NULL; - skb->len -= len; - return skb->data += len; -} - -static inline unsigned char *pskb_pull(struct sk_buff *skb, unsigned int len) -{ - return unlikely(len > skb->len) ? NULL : __pskb_pull(skb, len); -} - -static inline int pskb_may_pull(struct sk_buff *skb, unsigned int len) -{ - if (likely(len <= skb_headlen(skb))) - return 1; - if (unlikely(len > skb->len)) - return 0; - return __pskb_pull_tail(skb, len-skb_headlen(skb)) != NULL; -} - -/** - * skb_headroom - bytes at buffer head - * @skb: buffer to check - * - * Return the number of bytes of free space at the head of an &sk_buff. - */ -static inline int skb_headroom(const struct sk_buff *skb) -{ - return skb->data - skb->head; -} - -/** - * skb_tailroom - bytes at buffer end - * @skb: buffer to check - * - * Return the number of bytes of free space at the tail of an sk_buff - */ -static inline int skb_tailroom(const struct sk_buff *skb) -{ - return skb_is_nonlinear(skb) ? 0 : skb->end - skb->tail; -} - -/** - * skb_reserve - adjust headroom - * @skb: buffer to alter - * @len: bytes to move - * - * Increase the headroom of an empty &sk_buff by reducing the tail - * room. This is only allowed for an empty buffer. - */ -static inline void skb_reserve(struct sk_buff *skb, unsigned int len) -{ - skb->data += len; - skb->tail += len; -} - -/* - * CPUs often take a performance hit when accessing unaligned memory - * locations. The actual performance hit varies, it can be small if the - * hardware handles it or large if we have to take an exception and fix it - * in software. - * - * Since an ethernet header is 14 bytes network drivers often end up with - * the IP header at an unaligned offset. The IP header can be aligned by - * shifting the start of the packet by 2 bytes. Drivers should do this - * with: - * - * skb_reserve(NET_IP_ALIGN); - * - * The downside to this alignment of the IP header is that the DMA is now - * unaligned. On some architectures the cost of an unaligned DMA is high - * and this cost outweighs the gains made by aligning the IP header. - * - * Since this trade off varies between architectures, we allow NET_IP_ALIGN - * to be overridden. - */ -#ifndef NET_IP_ALIGN -#define NET_IP_ALIGN 2 -#endif - -extern int ___pskb_trim(struct sk_buff *skb, unsigned int len, int realloc); - -static inline void __skb_trim(struct sk_buff *skb, unsigned int len) -{ - if (!skb->data_len) { - skb->len = len; - skb->tail = skb->data + len; - } else - ___pskb_trim(skb, len, 0); -} - -/** - * skb_trim - remove end from a buffer - * @skb: buffer to alter - * @len: new length - * - * Cut the length of a buffer down by removing data from the tail. If - * the buffer is already under the length specified it is not modified. - */ -static inline void skb_trim(struct sk_buff *skb, unsigned int len) -{ - if (skb->len > len) - __skb_trim(skb, len); -} - - -static inline int __pskb_trim(struct sk_buff *skb, unsigned int len) -{ - if (!skb->data_len) { - skb->len = len; - skb->tail = skb->data+len; - return 0; - } - return ___pskb_trim(skb, len, 1); -} - -static inline int pskb_trim(struct sk_buff *skb, unsigned int len) -{ - return (len < skb->len) ? __pskb_trim(skb, len) : 0; -} - -/** - * skb_orphan - orphan a buffer - * @skb: buffer to orphan - * - * If a buffer currently has an owner then we call the owner's - * destructor function and make the @skb unowned. The buffer continues - * to exist but is no longer charged to its former owner. - */ -static inline void skb_orphan(struct sk_buff *skb) -{ - if (skb->destructor) - skb->destructor(skb); - skb->destructor = NULL; - skb->sk = NULL; -} - -/** - * __skb_queue_purge - empty a list - * @list: list to empty - * - * Delete all buffers on an &sk_buff list. Each buffer is removed from - * the list and one reference dropped. This function does not take the - * list lock and the caller must hold the relevant locks to use it. - */ -extern void skb_queue_purge(struct sk_buff_head *list); -static inline void __skb_queue_purge(struct sk_buff_head *list) -{ - struct sk_buff *skb; - while ((skb = __skb_dequeue(list)) != NULL) - kfree_skb(skb); -} - -/** - * __dev_alloc_skb - allocate an skbuff for sending - * @length: length to allocate - * @gfp_mask: get_free_pages mask, passed to alloc_skb - * - * Allocate a new &sk_buff and assign it a usage count of one. The - * buffer has unspecified headroom built in. Users should allocate - * the headroom they think they need without accounting for the - * built in space. The built in space is used for optimisations. - * - * %NULL is returned in there is no free memory. - */ -#ifndef CONFIG_HAVE_ARCH_DEV_ALLOC_SKB -static inline struct sk_buff *__dev_alloc_skb(unsigned int length, - int gfp_mask) -{ - struct sk_buff *skb = alloc_skb(length + 16, gfp_mask); - if (likely(skb)) - skb_reserve(skb, 16); - return skb; -} -#else -extern struct sk_buff *__dev_alloc_skb(unsigned int length, int gfp_mask); -#endif - -/** - * dev_alloc_skb - allocate an skbuff for sending - * @length: length to allocate - * - * Allocate a new &sk_buff and assign it a usage count of one. The - * buffer has unspecified headroom built in. Users should allocate - * the headroom they think they need without accounting for the - * built in space. The built in space is used for optimisations. - * - * %NULL is returned in there is no free memory. Although this function - * allocates memory it can be called from an interrupt. - */ -static inline struct sk_buff *dev_alloc_skb(unsigned int length) -{ - return __dev_alloc_skb(length, GFP_ATOMIC); -} - -/** - * skb_cow - copy header of skb when it is required - * @skb: buffer to cow - * @headroom: needed headroom - * - * If the skb passed lacks sufficient headroom or its data part - * is shared, data is reallocated. If reallocation fails, an error - * is returned and original skb is not changed. - * - * The result is skb with writable area skb->head...skb->tail - * and at least @headroom of space at head. - */ -static inline int skb_cow(struct sk_buff *skb, unsigned int headroom) -{ - int delta = (headroom > 16 ? headroom : 16) - skb_headroom(skb); - - if (delta < 0) - delta = 0; - - if (delta || skb_cloned(skb)) - return pskb_expand_head(skb, (delta + 15) & ~15, 0, GFP_ATOMIC); - return 0; -} - -/** - * skb_padto - pad an skbuff up to a minimal size - * @skb: buffer to pad - * @len: minimal length - * - * Pads up a buffer to ensure the trailing bytes exist and are - * blanked. If the buffer already contains sufficient data it - * is untouched. Returns the buffer, which may be a replacement - * for the original, or NULL for out of memory - in which case - * the original buffer is still freed. - */ - -static inline struct sk_buff *skb_padto(struct sk_buff *skb, unsigned int len) -{ - unsigned int size = skb->len; - if (likely(size >= len)) - return skb; - return skb_pad(skb, len-size); -} - -static inline int skb_add_data(struct sk_buff *skb, - char __user *from, int copy) -{ - const int off = skb->len; - - if (skb->ip_summed == CHECKSUM_NONE) { - int err = 0; - unsigned int csum = csum_and_copy_from_user(from, - skb_put(skb, copy), - copy, 0, &err); - if (!err) { - skb->csum = csum_block_add(skb->csum, csum, off); - return 0; - } - } else if (!copy_from_user(skb_put(skb, copy), from, copy)) - return 0; - - __skb_trim(skb, off); - return -EFAULT; -} - -static inline int skb_can_coalesce(struct sk_buff *skb, int i, - struct page *page, int off) -{ - if (i) { - struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[i - 1]; - - return page == frag->page && - off == frag->page_offset + frag->size; - } - return 0; -} - -/** - * skb_linearize - convert paged skb to linear one - * @skb: buffer to linarize - * @gfp: allocation mode - * - * If there is no free memory -ENOMEM is returned, otherwise zero - * is returned and the old skb data released. - */ -extern int __skb_linearize(struct sk_buff *skb, int gfp); -static inline int skb_linearize(struct sk_buff *skb, int gfp) -{ - return __skb_linearize(skb, gfp); -} - -static inline void *kmap_skb_frag(const skb_frag_t *frag) -{ -#ifdef CONFIG_HIGHMEM - BUG_ON(in_irq()); - - local_bh_disable(); -#endif - return kmap_atomic(frag->page, KM_SKB_DATA_SOFTIRQ); -} - -static inline void kunmap_skb_frag(void *vaddr) -{ - kunmap_atomic(vaddr, KM_SKB_DATA_SOFTIRQ); -#ifdef CONFIG_HIGHMEM - local_bh_enable(); -#endif -} - -#define skb_queue_walk(queue, skb) \ - for (skb = (queue)->next, prefetch(skb->next); \ - (skb != (struct sk_buff *)(queue)); \ - skb = skb->next, prefetch(skb->next)) - - -extern struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, - int noblock, int *err); -extern unsigned int datagram_poll(struct file *file, struct socket *sock, - struct poll_table_struct *wait); -extern int skb_copy_datagram(const struct sk_buff *from, - int offset, char __user *to, int size); -extern int skb_copy_datagram_iovec(const struct sk_buff *from, - int offset, struct iovec *to, - int size); -extern int skb_copy_and_csum_datagram(const struct sk_buff *skb, - int offset, u8 __user *to, - int len, unsigned int *csump); -extern int skb_copy_and_csum_datagram_iovec(const - struct sk_buff *skb, - int hlen, - struct iovec *iov); -extern void skb_free_datagram(struct sock *sk, struct sk_buff *skb); -extern unsigned int skb_checksum(const struct sk_buff *skb, int offset, - int len, unsigned int csum); -extern int skb_copy_bits(const struct sk_buff *skb, int offset, - void *to, int len); -extern unsigned int skb_copy_and_csum_bits(const struct sk_buff *skb, - int offset, u8 *to, int len, - unsigned int csum); -extern void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to); -extern void skb_split(struct sk_buff *skb, - struct sk_buff *skb1, const u32 len); - -static inline void *skb_header_pointer(const struct sk_buff *skb, int offset, - int len, void *buffer) -{ - int hlen = skb_headlen(skb); - - if (offset + len <= hlen) - return skb->data + offset; - - if (skb_copy_bits(skb, offset, buffer, len) < 0) - return NULL; - - return buffer; -} - -extern void skb_init(void); -extern void skb_add_mtu(int mtu); - -struct skb_iter { - /* Iteration functions set these */ - unsigned char *data; - unsigned int len; - - /* Private to iteration */ - unsigned int nextfrag; - struct sk_buff *fraglist; -}; - -/* Keep iterating until skb_iter_next returns false. */ -extern void skb_iter_first(const struct sk_buff *skb, struct skb_iter *i); -extern int skb_iter_next(const struct sk_buff *skb, struct skb_iter *i); -/* Call this if aborting loop before !skb_iter_next */ -extern void skb_iter_abort(const struct sk_buff *skb, struct skb_iter *i); - -#ifdef CONFIG_NETFILTER -static inline void nf_conntrack_put(struct nf_conntrack *nfct) -{ - if (nfct && atomic_dec_and_test(&nfct->use)) - nfct->destroy(nfct); -} -static inline void nf_conntrack_get(struct nf_conntrack *nfct) -{ - if (nfct) - atomic_inc(&nfct->use); -} -static inline void nf_reset(struct sk_buff *skb) -{ - nf_conntrack_put(skb->nfct); - skb->nfct = NULL; -#ifdef CONFIG_NETFILTER_DEBUG - skb->nf_debug = 0; -#endif -} -static inline void nf_reset_debug(struct sk_buff *skb) -{ -#ifdef CONFIG_NETFILTER_DEBUG - skb->nf_debug = 0; -#endif -} - -#ifdef CONFIG_BRIDGE_NETFILTER -static inline void nf_bridge_put(struct nf_bridge_info *nf_bridge) -{ - if (nf_bridge && atomic_dec_and_test(&nf_bridge->use)) - kfree(nf_bridge); -} -static inline void nf_bridge_get(struct nf_bridge_info *nf_bridge) -{ - if (nf_bridge) - atomic_inc(&nf_bridge->use); -} -#endif /* CONFIG_BRIDGE_NETFILTER */ -#else /* CONFIG_NETFILTER */ -static inline void nf_reset(struct sk_buff *skb) {} -#endif /* CONFIG_NETFILTER */ - -#endif /* __KERNEL__ */ -#endif /* _LINUX_SKBUFF_H */ diff --git a/linux-2.6.10-xen-sparse/net/core/skbuff.c b/linux-2.6.10-xen-sparse/net/core/skbuff.c deleted file mode 100644 index 4e2936caab..0000000000 --- a/linux-2.6.10-xen-sparse/net/core/skbuff.c +++ /dev/null @@ -1,1521 +0,0 @@ -/* - * Routines having to do with the 'struct sk_buff' memory handlers. - * - * Authors: Alan Cox - * Florian La Roche - * - * Version: $Id: skbuff.c,v 1.90 2001/11/07 05:56:19 davem Exp $ - * - * Fixes: - * Alan Cox : Fixed the worst of the load - * balancer bugs. - * Dave Platt : Interrupt stacking fix. - * Richard Kooijman : Timestamp fixes. - * Alan Cox : Changed buffer format. - * Alan Cox : destructor hook for AF_UNIX etc. - * Linus Torvalds : Better skb_clone. - * Alan Cox : Added skb_copy. - * Alan Cox : Added all the changed routines Linus - * only put in the headers - * Ray VanTassle : Fixed --skb->lock in free - * Alan Cox : skb_copy copy arp field - * Andi Kleen : slabified it. - * Robert Olsson : Removed skb_head_pool - * - * NOTE: - * The __skb_ routines should be called with interrupts - * disabled, or you better be *real* sure that the operation is atomic - * with respect to whatever list is being frobbed (e.g. via lock_sock() - * or via disabling bottom half handlers, etc). - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -/* - * The functions in this file will not compile correctly with gcc 2.4.x - */ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#ifdef CONFIG_NET_CLS_ACT -#include -#endif -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include - -#include -#include - -static kmem_cache_t *skbuff_head_cache; - -/* - * Keep out-of-line to prevent kernel bloat. - * __builtin_return_address is not used because it is not always - * reliable. - */ - -/** - * skb_over_panic - private function - * @skb: buffer - * @sz: size - * @here: address - * - * Out of line support code for skb_put(). Not user callable. - */ -void skb_over_panic(struct sk_buff *skb, int sz, void *here) -{ - printk(KERN_INFO "skput:over: %p:%d put:%d dev:%s", - here, skb->len, sz, skb->dev ? skb->dev->name : ""); - BUG(); -} - -/** - * skb_under_panic - private function - * @skb: buffer - * @sz: size - * @here: address - * - * Out of line support code for skb_push(). Not user callable. - */ - -void skb_under_panic(struct sk_buff *skb, int sz, void *here) -{ - printk(KERN_INFO "skput:under: %p:%d put:%d dev:%s", - here, skb->len, sz, skb->dev ? skb->dev->name : ""); - BUG(); -} - -/* Allocate a new skbuff. We do this ourselves so we can fill in a few - * 'private' fields and also do memory statistics to find all the - * [BEEP] leaks. - * - */ - -/** - * alloc_skb - allocate a network buffer - * @size: size to allocate - * @gfp_mask: allocation mask - * - * Allocate a new &sk_buff. The returned buffer has no headroom and a - * tail room of size bytes. The object has a reference count of one. - * The return is the buffer. On a failure the return is %NULL. - * - * Buffers may only be allocated from interrupts using a @gfp_mask of - * %GFP_ATOMIC. - */ -struct sk_buff *alloc_skb(unsigned int size, int gfp_mask) -{ - struct sk_buff *skb; - u8 *data; - - /* Get the HEAD */ - skb = kmem_cache_alloc(skbuff_head_cache, - gfp_mask & ~__GFP_DMA); - if (!skb) - goto out; - - /* Get the DATA. Size must match skb_add_mtu(). */ - size = SKB_DATA_ALIGN(size); - data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask); - if (!data) - goto nodata; - - memset(skb, 0, offsetof(struct sk_buff, truesize)); - skb->truesize = size + sizeof(struct sk_buff); - atomic_set(&skb->users, 1); - skb->head = data; - skb->data = data; - skb->tail = data; - skb->end = data + size; - - atomic_set(&(skb_shinfo(skb)->dataref), 1); - skb_shinfo(skb)->nr_frags = 0; - skb_shinfo(skb)->tso_size = 0; - skb_shinfo(skb)->tso_segs = 0; - skb_shinfo(skb)->frag_list = NULL; -out: - return skb; -nodata: - kmem_cache_free(skbuff_head_cache, skb); - skb = NULL; - goto out; -} - -/** - * alloc_skb_from_cache - allocate a network buffer - * @cp: kmem_cache from which to allocate the data area - * (object size must be big enough for @size bytes + skb overheads) - * @size: size to allocate - * @gfp_mask: allocation mask - * - * Allocate a new &sk_buff. The returned buffer has no headroom and a - * tail room of size bytes. The object has a reference count of one. - * The return is the buffer. On a failure the return is %NULL. - * - * Buffers may only be allocated from interrupts using a @gfp_mask of - * %GFP_ATOMIC. - */ -struct sk_buff *alloc_skb_from_cache(kmem_cache_t *cp, - unsigned int size, int gfp_mask) -{ - struct sk_buff *skb; - u8 *data; - - /* Get the HEAD */ - skb = kmem_cache_alloc(skbuff_head_cache, - gfp_mask & ~__GFP_DMA); - if (!skb) - goto out; - - /* Get the DATA. */ - size = SKB_DATA_ALIGN(size); - data = kmem_cache_alloc(cp, gfp_mask); - if (!data) - goto nodata; - - memset(skb, 0, offsetof(struct sk_buff, truesize)); - skb->truesize = size + sizeof(struct sk_buff); - atomic_set(&skb->users, 1); - skb->head = data; - skb->data = data; - skb->tail = data; - skb->end = data + size; - - atomic_set(&(skb_shinfo(skb)->dataref), 1); - skb_shinfo(skb)->nr_frags = 0; - skb_shinfo(skb)->tso_size = 0; - skb_shinfo(skb)->tso_segs = 0; - skb_shinfo(skb)->frag_list = NULL; -out: - return skb; -nodata: - kmem_cache_free(skbuff_head_cache, skb); - skb = NULL; - goto out; -} - - -static void skb_drop_fraglist(struct sk_buff *skb) -{ - struct sk_buff *list = skb_shinfo(skb)->frag_list; - - skb_shinfo(skb)->frag_list = NULL; - - do { - struct sk_buff *this = list; - list = list->next; - kfree_skb(this); - } while (list); -} - -static void skb_clone_fraglist(struct sk_buff *skb) -{ - struct sk_buff *list; - - for (list = skb_shinfo(skb)->frag_list; list; list = list->next) - skb_get(list); -} - -void skb_release_data(struct sk_buff *skb) -{ - if (!skb->cloned || - atomic_dec_and_test(&(skb_shinfo(skb)->dataref))) { - if (skb_shinfo(skb)->nr_frags) { - int i; - for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) - put_page(skb_shinfo(skb)->frags[i].page); - } - - if (skb_shinfo(skb)->frag_list) - skb_drop_fraglist(skb); - - kfree(skb->head); - } -} - -/* - * Free an skbuff by memory without cleaning the state. - */ -void kfree_skbmem(struct sk_buff *skb) -{ - skb_release_data(skb); - kmem_cache_free(skbuff_head_cache, skb); -} - -/** - * __kfree_skb - private function - * @skb: buffer - * - * Free an sk_buff. Release anything attached to the buffer. - * Clean the state. This is an internal helper function. Users should - * always call kfree_skb - */ - -void __kfree_skb(struct sk_buff *skb) -{ - if (skb->list) { - printk(KERN_WARNING "Warning: kfree_skb passed an skb still " - "on a list (from %p).\n", NET_CALLER(skb)); - BUG(); - } - - dst_release(skb->dst); -#ifdef CONFIG_XFRM - secpath_put(skb->sp); -#endif - if(skb->destructor) { - if (in_irq()) - printk(KERN_WARNING "Warning: kfree_skb on " - "hard IRQ %p\n", NET_CALLER(skb)); - skb->destructor(skb); - } -#ifdef CONFIG_NETFILTER - nf_conntrack_put(skb->nfct); -#ifdef CONFIG_BRIDGE_NETFILTER - nf_bridge_put(skb->nf_bridge); -#endif -#endif -/* XXX: IS this still necessary? - JHS */ -#ifdef CONFIG_NET_SCHED - skb->tc_index = 0; -#ifdef CONFIG_NET_CLS_ACT - skb->tc_verd = 0; - skb->tc_classid = 0; -#endif -#endif - - kfree_skbmem(skb); -} - -/** - * skb_clone - duplicate an sk_buff - * @skb: buffer to clone - * @gfp_mask: allocation priority - * - * Duplicate an &sk_buff. The new one is not owned by a socket. Both - * copies share the same packet data but not structure. The new - * buffer has a reference count of 1. If the allocation fails the - * function returns %NULL otherwise the new buffer is returned. - * - * If this function is called from an interrupt gfp_mask() must be - * %GFP_ATOMIC. - */ - -struct sk_buff *skb_clone(struct sk_buff *skb, int gfp_mask) -{ - struct sk_buff *n = kmem_cache_alloc(skbuff_head_cache, gfp_mask); - - if (!n) - return NULL; - -#define C(x) n->x = skb->x - - n->next = n->prev = NULL; - n->list = NULL; - n->sk = NULL; - C(stamp); - C(dev); - C(real_dev); - C(h); - C(nh); - C(mac); - C(dst); - dst_clone(skb->dst); - C(sp); -#ifdef CONFIG_INET - secpath_get(skb->sp); -#endif - memcpy(n->cb, skb->cb, sizeof(skb->cb)); - C(len); - C(data_len); - C(csum); - C(local_df); - n->cloned = 1; - C(pkt_type); - C(ip_summed); - C(priority); - C(protocol); - C(security); - n->destructor = NULL; -#ifdef CONFIG_NETFILTER - C(nfmark); - C(nfcache); - C(nfct); - nf_conntrack_get(skb->nfct); - C(nfctinfo); -#ifdef CONFIG_NETFILTER_DEBUG - C(nf_debug); -#endif -#ifdef CONFIG_BRIDGE_NETFILTER - C(nf_bridge); - nf_bridge_get(skb->nf_bridge); -#endif -#endif /*CONFIG_NETFILTER*/ -#if defined(CONFIG_HIPPI) - C(private); -#endif -#ifdef CONFIG_NET_SCHED - C(tc_index); -#ifdef CONFIG_NET_CLS_ACT - n->tc_verd = SET_TC_VERD(skb->tc_verd,0); - n->tc_verd = CLR_TC_OK2MUNGE(skb->tc_verd); - n->tc_verd = CLR_TC_MUNGED(skb->tc_verd); - C(input_dev); - C(tc_classid); -#endif - -#endif - C(truesize); - atomic_set(&n->users, 1); - C(head); - C(data); - C(tail); - C(end); - - atomic_inc(&(skb_shinfo(skb)->dataref)); - skb->cloned = 1; - - return n; -} - -static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old) -{ - /* - * Shift between the two data areas in bytes - */ - unsigned long offset = new->data - old->data; - - new->list = NULL; - new->sk = NULL; - new->dev = old->dev; - new->real_dev = old->real_dev; - new->priority = old->priority; - new->protocol = old->protocol; - new->dst = dst_clone(old->dst); -#ifdef CONFIG_INET - new->sp = secpath_get(old->sp); -#endif - new->h.raw = old->h.raw + offset; - new->nh.raw = old->nh.raw + offset; - new->mac.raw = old->mac.raw + offset; - memcpy(new->cb, old->cb, sizeof(old->cb)); - new->local_df = old->local_df; - new->pkt_type = old->pkt_type; - new->stamp = old->stamp; - new->destructor = NULL; - new->security = old->security; -#ifdef CONFIG_NETFILTER - new->nfmark = old->nfmark; - new->nfcache = old->nfcache; - new->nfct = old->nfct; - nf_conntrack_get(old->nfct); - new->nfctinfo = old->nfctinfo; -#ifdef CONFIG_NETFILTER_DEBUG - new->nf_debug = old->nf_debug; -#endif -#ifdef CONFIG_BRIDGE_NETFILTER - new->nf_bridge = old->nf_bridge; - nf_bridge_get(old->nf_bridge); -#endif -#endif -#ifdef CONFIG_NET_SCHED -#ifdef CONFIG_NET_CLS_ACT - new->tc_verd = old->tc_verd; -#endif - new->tc_index = old->tc_index; -#endif - atomic_set(&new->users, 1); - skb_shinfo(new)->tso_size = skb_shinfo(old)->tso_size; - skb_shinfo(new)->tso_segs = skb_shinfo(old)->tso_segs; -} - -/** - * skb_copy - create private copy of an sk_buff - * @skb: buffer to copy - * @gfp_mask: allocation priority - * - * Make a copy of both an &sk_buff and its data. This is used when the - * caller wishes to modify the data and needs a private copy of the - * data to alter. Returns %NULL on failure or the pointer to the buffer - * on success. The returned buffer has a reference count of 1. - * - * As by-product this function converts non-linear &sk_buff to linear - * one, so that &sk_buff becomes completely private and caller is allowed - * to modify all the data of returned buffer. This means that this - * function is not recommended for use in circumstances when only - * header is going to be modified. Use pskb_copy() instead. - */ - -struct sk_buff *skb_copy(const struct sk_buff *skb, int gfp_mask) -{ - int headerlen = skb->data - skb->head; - /* - * Allocate the copy buffer - */ - struct sk_buff *n = alloc_skb(skb->end - skb->head + skb->data_len, - gfp_mask); - if (!n) - return NULL; - - /* Set the data pointer */ - skb_reserve(n, headerlen); - /* Set the tail pointer and length */ - skb_put(n, skb->len); - n->csum = skb->csum; - n->ip_summed = skb->ip_summed; - - if (skb_copy_bits(skb, -headerlen, n->head, headerlen + skb->len)) - BUG(); - - copy_skb_header(n, skb); - return n; -} - - -/** - * pskb_copy - create copy of an sk_buff with private head. - * @skb: buffer to copy - * @gfp_mask: allocation priority - * - * Make a copy of both an &sk_buff and part of its data, located - * in header. Fragmented data remain shared. This is used when - * the caller wishes to modify only header of &sk_buff and needs - * private copy of the header to alter. Returns %NULL on failure - * or the pointer to the buffer on success. - * The returned buffer has a reference count of 1. - */ - -struct sk_buff *pskb_copy(struct sk_buff *skb, int gfp_mask) -{ - /* - * Allocate the copy buffer - */ - struct sk_buff *n = alloc_skb(skb->end - skb->head, gfp_mask); - - if (!n) - goto out; - - /* Set the data pointer */ - skb_reserve(n, skb->data - skb->head); - /* Set the tail pointer and length */ - skb_put(n, skb_headlen(skb)); - /* Copy the bytes */ - memcpy(n->data, skb->data, n->len); - n->csum = skb->csum; - n->ip_summed = skb->ip_summed; - - n->data_len = skb->data_len; - n->len = skb->len; - - if (skb_shinfo(skb)->nr_frags) { - int i; - - for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { - skb_shinfo(n)->frags[i] = skb_shinfo(skb)->frags[i]; - get_page(skb_shinfo(n)->frags[i].page); - } - skb_shinfo(n)->nr_frags = i; - } - - if (skb_shinfo(skb)->frag_list) { - skb_shinfo(n)->frag_list = skb_shinfo(skb)->frag_list; - skb_clone_fraglist(n); - } - - copy_skb_header(n, skb); -out: - return n; -} - -/** - * pskb_expand_head - reallocate header of &sk_buff - * @skb: buffer to reallocate - * @nhead: room to add at head - * @ntail: room to add at tail - * @gfp_mask: allocation priority - * - * Expands (or creates identical copy, if &nhead and &ntail are zero) - * header of skb. &sk_buff itself is not changed. &sk_buff MUST have - * reference count of 1. Returns zero in the case of success or error, - * if expansion failed. In the last case, &sk_buff is not changed. - * - * All the pointers pointing into skb header may change and must be - * reloaded after call to this function. - */ - -int pskb_expand_head(struct sk_buff *skb, int nhead, int ntail, int gfp_mask) -{ - int i; - u8 *data; - int size = nhead + (skb->end - skb->head) + ntail; - long off; - - if (skb_shared(skb)) - BUG(); - - size = SKB_DATA_ALIGN(size); - - data = kmalloc(size + sizeof(struct skb_shared_info), gfp_mask); - if (!data) - goto nodata; - - /* Copy only real data... and, alas, header. This should be - * optimized for the cases when header is void. */ - memcpy(data + nhead, skb->head, skb->tail - skb->head); - memcpy(data + size, skb->end, sizeof(struct skb_shared_info)); - - for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) - get_page(skb_shinfo(skb)->frags[i].page); - - if (skb_shinfo(skb)->frag_list) - skb_clone_fraglist(skb); - - skb_release_data(skb); - - off = (data + nhead) - skb->head; - - skb->head = data; - skb->end = data + size; - skb->data += off; - skb->tail += off; - skb->mac.raw += off; - skb->h.raw += off; - skb->nh.raw += off; - skb->cloned = 0; - atomic_set(&skb_shinfo(skb)->dataref, 1); - return 0; - -nodata: - return -ENOMEM; -} - -/* Make private copy of skb with writable head and some headroom */ - -struct sk_buff *skb_realloc_headroom(struct sk_buff *skb, unsigned int headroom) -{ - struct sk_buff *skb2; - int delta = headroom - skb_headroom(skb); - - if (delta <= 0) - skb2 = pskb_copy(skb, GFP_ATOMIC); - else { - skb2 = skb_clone(skb, GFP_ATOMIC); - if (skb2 && pskb_expand_head(skb2, SKB_DATA_ALIGN(delta), 0, - GFP_ATOMIC)) { - kfree_skb(skb2); - skb2 = NULL; - } - } - return skb2; -} - - -/** - * skb_copy_expand - copy and expand sk_buff - * @skb: buffer to copy - * @newheadroom: new free bytes at head - * @newtailroom: new free bytes at tail - * @gfp_mask: allocation priority - * - * Make a copy of both an &sk_buff and its data and while doing so - * allocate additional space. - * - * This is used when the caller wishes to modify the data and needs a - * private copy of the data to alter as well as more space for new fields. - * Returns %NULL on failure or the pointer to the buffer - * on success. The returned buffer has a reference count of 1. - * - * You must pass %GFP_ATOMIC as the allocation priority if this function - * is called from an interrupt. - * - * BUG ALERT: ip_summed is not copied. Why does this work? Is it used - * only by netfilter in the cases when checksum is recalculated? --ANK - */ -struct sk_buff *skb_copy_expand(const struct sk_buff *skb, - int newheadroom, int newtailroom, int gfp_mask) -{ - /* - * Allocate the copy buffer - */ - struct sk_buff *n = alloc_skb(newheadroom + skb->len + newtailroom, - gfp_mask); - int head_copy_len, head_copy_off; - - if (!n) - return NULL; - - skb_reserve(n, newheadroom); - - /* Set the tail pointer and length */ - skb_put(n, skb->len); - - head_copy_len = skb_headroom(skb); - head_copy_off = 0; - if (newheadroom <= head_copy_len) - head_copy_len = newheadroom; - else - head_copy_off = newheadroom - head_copy_len; - - /* Copy the linear header and data. */ - if (skb_copy_bits(skb, -head_copy_len, n->head + head_copy_off, - skb->len + head_copy_len)) - BUG(); - - copy_skb_header(n, skb); - - return n; -} - -/** - * skb_pad - zero pad the tail of an skb - * @skb: buffer to pad - * @pad: space to pad - * - * Ensure that a buffer is followed by a padding area that is zero - * filled. Used by network drivers which may DMA or transfer data - * beyond the buffer end onto the wire. - * - * May return NULL in out of memory cases. - */ - -struct sk_buff *skb_pad(struct sk_buff *skb, int pad) -{ - struct sk_buff *nskb; - - /* If the skbuff is non linear tailroom is always zero.. */ - if (skb_tailroom(skb) >= pad) { - memset(skb->data+skb->len, 0, pad); - return skb; - } - - nskb = skb_copy_expand(skb, skb_headroom(skb), skb_tailroom(skb) + pad, GFP_ATOMIC); - kfree_skb(skb); - if (nskb) - memset(nskb->data+nskb->len, 0, pad); - return nskb; -} - -/* Trims skb to length len. It can change skb pointers, if "realloc" is 1. - * If realloc==0 and trimming is impossible without change of data, - * it is BUG(). - */ - -int ___pskb_trim(struct sk_buff *skb, unsigned int len, int realloc) -{ - int offset = skb_headlen(skb); - int nfrags = skb_shinfo(skb)->nr_frags; - int i; - - for (i = 0; i < nfrags; i++) { - int end = offset + skb_shinfo(skb)->frags[i].size; - if (end > len) { - if (skb_cloned(skb)) { - if (!realloc) - BUG(); - if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) - return -ENOMEM; - } - if (len <= offset) { - put_page(skb_shinfo(skb)->frags[i].page); - skb_shinfo(skb)->nr_frags--; - } else { - skb_shinfo(skb)->frags[i].size = len - offset; - } - } - offset = end; - } - - if (offset < len) { - skb->data_len -= skb->len - len; - skb->len = len; - } else { - if (len <= skb_headlen(skb)) { - skb->len = len; - skb->data_len = 0; - skb->tail = skb->data + len; - if (skb_shinfo(skb)->frag_list && !skb_cloned(skb)) - skb_drop_fraglist(skb); - } else { - skb->data_len -= skb->len - len; - skb->len = len; - } - } - - return 0; -} - -/** - * __pskb_pull_tail - advance tail of skb header - * @skb: buffer to reallocate - * @delta: number of bytes to advance tail - * - * The function makes a sense only on a fragmented &sk_buff, - * it expands header moving its tail forward and copying necessary - * data from fragmented part. - * - * &sk_buff MUST have reference count of 1. - * - * Returns %NULL (and &sk_buff does not change) if pull failed - * or value of new tail of skb in the case of success. - * - * All the pointers pointing into skb header may change and must be - * reloaded after call to this function. - */ - -/* Moves tail of skb head forward, copying data from fragmented part, - * when it is necessary. - * 1. It may fail due to malloc failure. - * 2. It may change skb pointers. - * - * It is pretty complicated. Luckily, it is called only in exceptional cases. - */ -unsigned char *__pskb_pull_tail(struct sk_buff *skb, int delta) -{ - /* If skb has not enough free space at tail, get new one - * plus 128 bytes for future expansions. If we have enough - * room at tail, reallocate without expansion only if skb is cloned. - */ - int i, k, eat = (skb->tail + delta) - skb->end; - - if (eat > 0 || skb_cloned(skb)) { - if (pskb_expand_head(skb, 0, eat > 0 ? eat + 128 : 0, - GFP_ATOMIC)) - return NULL; - } - - if (skb_copy_bits(skb, skb_headlen(skb), skb->tail, delta)) - BUG(); - - /* Optimization: no fragments, no reasons to preestimate - * size of pulled pages. Superb. - */ - if (!skb_shinfo(skb)->frag_list) - goto pull_pages; - - /* Estimate size of pulled pages. */ - eat = delta; - for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { - if (skb_shinfo(skb)->frags[i].size >= eat) - goto pull_pages; - eat -= skb_shinfo(skb)->frags[i].size; - } - - /* If we need update frag list, we are in troubles. - * Certainly, it possible to add an offset to skb data, - * but taking into account that pulling is expected to - * be very rare operation, it is worth to fight against - * further bloating skb head and crucify ourselves here instead. - * Pure masohism, indeed. 8)8) - */ - if (eat) { - struct sk_buff *list = skb_shinfo(skb)->frag_list; - struct sk_buff *clone = NULL; - struct sk_buff *insp = NULL; - - do { - if (!list) - BUG(); - - if (list->len <= eat) { - /* Eaten as whole. */ - eat -= list->len; - list = list->next; - insp = list; - } else { - /* Eaten partially. */ - - if (skb_shared(list)) { - /* Sucks! We need to fork list. :-( */ - clone = skb_clone(list, GFP_ATOMIC); - if (!clone) - return NULL; - insp = list->next; - list = clone; - } else { - /* This may be pulled without - * problems. */ - insp = list; - } - if (!pskb_pull(list, eat)) { - if (clone) - kfree_skb(clone); - return NULL; - } - break; - } - } while (eat); - - /* Free pulled out fragments. */ - while ((list = skb_shinfo(skb)->frag_list) != insp) { - skb_shinfo(skb)->frag_list = list->next; - kfree_skb(list); - } - /* And insert new clone at head. */ - if (clone) { - clone->next = list; - skb_shinfo(skb)->frag_list = clone; - } - } - /* Success! Now we may commit changes to skb data. */ - -pull_pages: - eat = delta; - k = 0; - for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { - if (skb_shinfo(skb)->frags[i].size <= eat) { - put_page(skb_shinfo(skb)->frags[i].page); - eat -= skb_shinfo(skb)->frags[i].size; - } else { - skb_shinfo(skb)->frags[k] = skb_shinfo(skb)->frags[i]; - if (eat) { - skb_shinfo(skb)->frags[k].page_offset += eat; - skb_shinfo(skb)->frags[k].size -= eat; - eat = 0; - } - k++; - } - } - skb_shinfo(skb)->nr_frags = k; - - skb->tail += delta; - skb->data_len -= delta; - - return skb->tail; -} - -/* Copy some data bits from skb to kernel buffer. */ - -int skb_copy_bits(const struct sk_buff *skb, int offset, void *to, int len) -{ - int i, copy; - int start = skb_headlen(skb); - - if (offset > (int)skb->len - len) - goto fault; - - /* Copy header. */ - if ((copy = start - offset) > 0) { - if (copy > len) - copy = len; - memcpy(to, skb->data + offset, copy); - if ((len -= copy) == 0) - return 0; - offset += copy; - to += copy; - } - - for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { - int end; - - BUG_TRAP(start <= offset + len); - - end = start + skb_shinfo(skb)->frags[i].size; - if ((copy = end - offset) > 0) { - u8 *vaddr; - - if (copy > len) - copy = len; - - vaddr = kmap_skb_frag(&skb_shinfo(skb)->frags[i]); - memcpy(to, - vaddr + skb_shinfo(skb)->frags[i].page_offset+ - offset - start, copy); - kunmap_skb_frag(vaddr); - - if ((len -= copy) == 0) - return 0; - offset += copy; - to += copy; - } - start = end; - } - - if (skb_shinfo(skb)->frag_list) { - struct sk_buff *list = skb_shinfo(skb)->frag_list; - - for (; list; list = list->next) { - int end; - - BUG_TRAP(start <= offset + len); - - end = start + list->len; - if ((copy = end - offset) > 0) { - if (copy > len) - copy = len; - if (skb_copy_bits(list, offset - start, - to, copy)) - goto fault; - if ((len -= copy) == 0) - return 0; - offset += copy; - to += copy; - } - start = end; - } - } - if (!len) - return 0; - -fault: - return -EFAULT; -} - -/* Keep iterating until skb_iter_next returns false. */ -void skb_iter_first(const struct sk_buff *skb, struct skb_iter *i) -{ - i->len = skb_headlen(skb); - i->data = (unsigned char *)skb->data; - i->nextfrag = 0; - i->fraglist = NULL; -} - -int skb_iter_next(const struct sk_buff *skb, struct skb_iter *i) -{ - /* Unmap previous, if not head fragment. */ - if (i->nextfrag) - kunmap_skb_frag(i->data); - - if (i->fraglist) { - fraglist: - /* We're iterating through fraglist. */ - if (i->nextfrag < skb_shinfo(i->fraglist)->nr_frags) { - i->data = kmap_skb_frag(&skb_shinfo(i->fraglist) - ->frags[i->nextfrag]); - i->len = skb_shinfo(i->fraglist)->frags[i->nextfrag] - .size; - i->nextfrag++; - return 1; - } - /* Fragments with fragments? Too hard! */ - BUG_ON(skb_shinfo(i->fraglist)->frag_list); - i->fraglist = i->fraglist->next; - if (!i->fraglist) - goto end; - - i->len = skb_headlen(i->fraglist); - i->data = i->fraglist->data; - i->nextfrag = 0; - return 1; - } - - if (i->nextfrag < skb_shinfo(skb)->nr_frags) { - i->data = kmap_skb_frag(&skb_shinfo(skb)->frags[i->nextfrag]); - i->len = skb_shinfo(skb)->frags[i->nextfrag].size; - i->nextfrag++; - return 1; - } - - i->fraglist = skb_shinfo(skb)->frag_list; - if (i->fraglist) - goto fraglist; - -end: - /* Bug trap for callers */ - i->data = NULL; - return 0; -} - -void skb_iter_abort(const struct sk_buff *skb, struct skb_iter *i) -{ - /* Unmap previous, if not head fragment. */ - if (i->data && i->nextfrag) - kunmap_skb_frag(i->data); - /* Bug trap for callers */ - i->data = NULL; -} - -/* Checksum skb data. */ - -unsigned int skb_checksum(const struct sk_buff *skb, int offset, - int len, unsigned int csum) -{ - int start = skb_headlen(skb); - int i, copy = start - offset; - int pos = 0; - - /* Checksum header. */ - if (copy > 0) { - if (copy > len) - copy = len; - csum = csum_partial(skb->data + offset, copy, csum); - if ((len -= copy) == 0) - return csum; - offset += copy; - pos = copy; - } - - for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { - int end; - - BUG_TRAP(start <= offset + len); - - end = start + skb_shinfo(skb)->frags[i].size; - if ((copy = end - offset) > 0) { - unsigned int csum2; - u8 *vaddr; - skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - - if (copy > len) - copy = len; - vaddr = kmap_skb_frag(frag); - csum2 = csum_partial(vaddr + frag->page_offset + - offset - start, copy, 0); - kunmap_skb_frag(vaddr); - csum = csum_block_add(csum, csum2, pos); - if (!(len -= copy)) - return csum; - offset += copy; - pos += copy; - } - start = end; - } - - if (skb_shinfo(skb)->frag_list) { - struct sk_buff *list = skb_shinfo(skb)->frag_list; - - for (; list; list = list->next) { - int end; - - BUG_TRAP(start <= offset + len); - - end = start + list->len; - if ((copy = end - offset) > 0) { - unsigned int csum2; - if (copy > len) - copy = len; - csum2 = skb_checksum(list, offset - start, - copy, 0); - csum = csum_block_add(csum, csum2, pos); - if ((len -= copy) == 0) - return csum; - offset += copy; - pos += copy; - } - start = end; - } - } - if (len) - BUG(); - - return csum; -} - -/* Both of above in one bottle. */ - -unsigned int skb_copy_and_csum_bits(const struct sk_buff *skb, int offset, - u8 *to, int len, unsigned int csum) -{ - int start = skb_headlen(skb); - int i, copy = start - offset; - int pos = 0; - - /* Copy header. */ - if (copy > 0) { - if (copy > len) - copy = len; - csum = csum_partial_copy_nocheck(skb->data + offset, to, - copy, csum); - if ((len -= copy) == 0) - return csum; - offset += copy; - to += copy; - pos = copy; - } - - for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) { - int end; - - BUG_TRAP(start <= offset + len); - - end = start + skb_shinfo(skb)->frags[i].size; - if ((copy = end - offset) > 0) { - unsigned int csum2; - u8 *vaddr; - skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; - - if (copy > len) - copy = len; - vaddr = kmap_skb_frag(frag); - csum2 = csum_partial_copy_nocheck(vaddr + - frag->page_offset + - offset - start, to, - copy, 0); - kunmap_skb_frag(vaddr); - csum = csum_block_add(csum, csum2, pos); - if (!(len -= copy)) - return csum; - offset += copy; - to += copy; - pos += copy; - } - start = end; - } - - if (skb_shinfo(skb)->frag_list) { - struct sk_buff *list = skb_shinfo(skb)->frag_list; - - for (; list; list = list->next) { - unsigned int csum2; - int end; - - BUG_TRAP(start <= offset + len); - - end = start + list->len; - if ((copy = end - offset) > 0) { - if (copy > len) - copy = len; - csum2 = skb_copy_and_csum_bits(list, - offset - start, - to, copy, 0); - csum = csum_block_add(csum, csum2, pos); - if ((len -= copy) == 0) - return csum; - offset += copy; - to += copy; - pos += copy; - } - start = end; - } - } - if (len) - BUG(); - return csum; -} - -void skb_copy_and_csum_dev(const struct sk_buff *skb, u8 *to) -{ - unsigned int csum; - long csstart; - - if (skb->ip_summed == CHECKSUM_HW) - csstart = skb->h.raw - skb->data; - else - csstart = skb_headlen(skb); - - if (csstart > skb_headlen(skb)) - BUG(); - - memcpy(to, skb->data, csstart); - - csum = 0; - if (csstart != skb->len) - csum = skb_copy_and_csum_bits(skb, csstart, to + csstart, - skb->len - csstart, 0); - - if (skb->ip_summed == CHECKSUM_HW) { - long csstuff = csstart + skb->csum; - - *((unsigned short *)(to + csstuff)) = csum_fold(csum); - } -} - -/** - * skb_dequeue - remove from the head of the queue - * @list: list to dequeue from - * - * Remove the head of the list. The list lock is taken so the function - * may be used safely with other locking list functions. The head item is - * returned or %NULL if the list is empty. - */ - -struct sk_buff *skb_dequeue(struct sk_buff_head *list) -{ - unsigned long flags; - struct sk_buff *result; - - spin_lock_irqsave(&list->lock, flags); - result = __skb_dequeue(list); - spin_unlock_irqrestore(&list->lock, flags); - return result; -} - -/** - * skb_dequeue_tail - remove from the tail of the queue - * @list: list to dequeue from - * - * Remove the tail of the list. The list lock is taken so the function - * may be used safely with other locking list functions. The tail item is - * returned or %NULL if the list is empty. - */ -struct sk_buff *skb_dequeue_tail(struct sk_buff_head *list) -{ - unsigned long flags; - struct sk_buff *result; - - spin_lock_irqsave(&list->lock, flags); - result = __skb_dequeue_tail(list); - spin_unlock_irqrestore(&list->lock, flags); - return result; -} - -/** - * skb_queue_purge - empty a list - * @list: list to empty - * - * Delete all buffers on an &sk_buff list. Each buffer is removed from - * the list and one reference dropped. This function takes the list - * lock and is atomic with respect to other list locking functions. - */ -void skb_queue_purge(struct sk_buff_head *list) -{ - struct sk_buff *skb; - while ((skb = skb_dequeue(list)) != NULL) - kfree_skb(skb); -} - -/** - * skb_queue_head - queue a buffer at the list head - * @list: list to use - * @newsk: buffer to queue - * - * Queue a buffer at the start of the list. This function takes the - * list lock and can be used safely with other locking &sk_buff functions - * safely. - * - * A buffer cannot be placed on two lists at the same time. - */ -void skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk) -{ - unsigned long flags; - - spin_lock_irqsave(&list->lock, flags); - __skb_queue_head(list, newsk); - spin_unlock_irqrestore(&list->lock, flags); -} - -/** - * skb_queue_tail - queue a buffer at the list tail - * @list: list to use - * @newsk: buffer to queue - * - * Queue a buffer at the tail of the list. This function takes the - * list lock and can be used safely with other locking &sk_buff functions - * safely. - * - * A buffer cannot be placed on two lists at the same time. - */ -void skb_queue_tail(struct sk_buff_head *list, struct sk_buff *newsk) -{ - unsigned long flags; - - spin_lock_irqsave(&list->lock, flags); - __skb_queue_tail(list, newsk); - spin_unlock_irqrestore(&list->lock, flags); -} -/** - * skb_unlink - remove a buffer from a list - * @skb: buffer to remove - * - * Place a packet after a given packet in a list. The list locks are taken - * and this function is atomic with respect to other list locked calls - * - * Works even without knowing the list it is sitting on, which can be - * handy at times. It also means that THE LIST MUST EXIST when you - * unlink. Thus a list must have its contents unlinked before it is - * destroyed. - */ -void skb_unlink(struct sk_buff *skb) -{ - struct sk_buff_head *list = skb->list; - - if (list) { - unsigned long flags; - - spin_lock_irqsave(&list->lock, flags); - if (skb->list == list) - __skb_unlink(skb, skb->list); - spin_unlock_irqrestore(&list->lock, flags); - } -} - - -/** - * skb_append - append a buffer - * @old: buffer to insert after - * @newsk: buffer to insert - * - * Place a packet after a given packet in a list. The list locks are taken - * and this function is atomic with respect to other list locked calls. - * A buffer cannot be placed on two lists at the same time. - */ - -void skb_append(struct sk_buff *old, struct sk_buff *newsk) -{ - unsigned long flags; - - spin_lock_irqsave(&old->list->lock, flags); - __skb_append(old, newsk); - spin_unlock_irqrestore(&old->list->lock, flags); -} - - -/** - * skb_insert - insert a buffer - * @old: buffer to insert before - * @newsk: buffer to insert - * - * Place a packet before a given packet in a list. The list locks are taken - * and this function is atomic with respect to other list locked calls - * A buffer cannot be placed on two lists at the same time. - */ - -void skb_insert(struct sk_buff *old, struct sk_buff *newsk) -{ - unsigned long flags; - - spin_lock_irqsave(&old->list->lock, flags); - __skb_insert(newsk, old->prev, old, old->list); - spin_unlock_irqrestore(&old->list->lock, flags); -} - -#if 0 -/* - * Tune the memory allocator for a new MTU size. - */ -void skb_add_mtu(int mtu) -{ - /* Must match allocation in alloc_skb */ - mtu = SKB_DATA_ALIGN(mtu) + sizeof(struct skb_shared_info); - - kmem_add_cache_size(mtu); -} -#endif - -static inline void skb_split_inside_header(struct sk_buff *skb, - struct sk_buff* skb1, - const u32 len, const int pos) -{ - int i; - - memcpy(skb_put(skb1, pos - len), skb->data + len, pos - len); - - /* And move data appendix as is. */ - for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) - skb_shinfo(skb1)->frags[i] = skb_shinfo(skb)->frags[i]; - - skb_shinfo(skb1)->nr_frags = skb_shinfo(skb)->nr_frags; - skb_shinfo(skb)->nr_frags = 0; - skb1->data_len = skb->data_len; - skb1->len += skb1->data_len; - skb->data_len = 0; - skb->len = len; - skb->tail = skb->data + len; -} - -static inline void skb_split_no_header(struct sk_buff *skb, - struct sk_buff* skb1, - const u32 len, int pos) -{ - int i, k = 0; - const int nfrags = skb_shinfo(skb)->nr_frags; - - skb_shinfo(skb)->nr_frags = 0; - skb1->len = skb1->data_len = skb->len - len; - skb->len = len; - skb->data_len = len - pos; - - for (i = 0; i < nfrags; i++) { - int size = skb_shinfo(skb)->frags[i].size; - - if (pos + size > len) { - skb_shinfo(skb1)->frags[k] = skb_shinfo(skb)->frags[i]; - - if (pos < len) { - /* Split frag. - * We have to variants in this case: - * 1. Move all the frag to the second - * part, if it is possible. F.e. - * this approach is mandatory for TUX, - * where splitting is expensive. - * 2. Split is accurately. We make this. - */ - get_page(skb_shinfo(skb)->frags[i].page); - skb_shinfo(skb1)->frags[0].page_offset += len - pos; - skb_shinfo(skb1)->frags[0].size -= len - pos; - skb_shinfo(skb)->frags[i].size = len - pos; - skb_shinfo(skb)->nr_frags++; - } - k++; - } else - skb_shinfo(skb)->nr_frags++; - pos += size; - } - skb_shinfo(skb1)->nr_frags = k; -} - -/** - * skb_split - Split fragmented skb to two parts at length len. - */ -void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len) -{ - int pos = skb_headlen(skb); - - if (len < pos) /* Split line is inside header. */ - skb_split_inside_header(skb, skb1, len, pos); - else /* Second chunk has no header, nothing to copy. */ - skb_split_no_header(skb, skb1, len, pos); -} - -void __init skb_init(void) -{ - skbuff_head_cache = kmem_cache_create("skbuff_head_cache", - sizeof(struct sk_buff), - 0, - SLAB_HWCACHE_ALIGN, - NULL, NULL); - if (!skbuff_head_cache) - panic("cannot create skbuff cache"); -} - -EXPORT_SYMBOL(___pskb_trim); -EXPORT_SYMBOL(__kfree_skb); -EXPORT_SYMBOL(__pskb_pull_tail); -EXPORT_SYMBOL(alloc_skb); -EXPORT_SYMBOL(pskb_copy); -EXPORT_SYMBOL(pskb_expand_head); -EXPORT_SYMBOL(skb_checksum); -EXPORT_SYMBOL(skb_clone); -EXPORT_SYMBOL(skb_clone_fraglist); -EXPORT_SYMBOL(skb_copy); -EXPORT_SYMBOL(skb_copy_and_csum_bits); -EXPORT_SYMBOL(skb_copy_and_csum_dev); -EXPORT_SYMBOL(skb_copy_bits); -EXPORT_SYMBOL(skb_copy_expand); -EXPORT_SYMBOL(skb_over_panic); -EXPORT_SYMBOL(skb_pad); -EXPORT_SYMBOL(skb_realloc_headroom); -EXPORT_SYMBOL(skb_under_panic); -EXPORT_SYMBOL(skb_dequeue); -EXPORT_SYMBOL(skb_dequeue_tail); -EXPORT_SYMBOL(skb_insert); -EXPORT_SYMBOL(skb_queue_purge); -EXPORT_SYMBOL(skb_queue_head); -EXPORT_SYMBOL(skb_queue_tail); -EXPORT_SYMBOL(skb_unlink); -EXPORT_SYMBOL(skb_append); -EXPORT_SYMBOL(skb_split); -EXPORT_SYMBOL(skb_iter_first); -EXPORT_SYMBOL(skb_iter_next); -EXPORT_SYMBOL(skb_iter_abort); diff --git a/linux-2.6.10-xen-sparse/arch/xen/Kconfig b/linux-2.6.11-xen-sparse/arch/xen/Kconfig similarity index 100% rename from linux-2.6.10-xen-sparse/arch/xen/Kconfig rename to linux-2.6.11-xen-sparse/arch/xen/Kconfig diff --git a/linux-2.6.10-xen-sparse/arch/xen/Kconfig.drivers b/linux-2.6.11-xen-sparse/arch/xen/Kconfig.drivers similarity index 100% rename from linux-2.6.10-xen-sparse/arch/xen/Kconfig.drivers rename to linux-2.6.11-xen-sparse/arch/xen/Kconfig.drivers diff --git a/linux-2.6.10-xen-sparse/arch/xen/Makefile b/linux-2.6.11-xen-sparse/arch/xen/Makefile similarity index 100% rename from linux-2.6.10-xen-sparse/arch/xen/Makefile rename to linux-2.6.11-xen-sparse/arch/xen/Makefile diff --git a/linux-2.6.10-xen-sparse/arch/xen/boot/Makefile b/linux-2.6.11-xen-sparse/arch/xen/boot/Makefile similarity index 100% rename from linux-2.6.10-xen-sparse/arch/xen/boot/Makefile rename to linux-2.6.11-xen-sparse/arch/xen/boot/Makefile diff --git a/linux-2.6.10-xen-sparse/arch/xen/configs/xen0_defconfig b/linux-2.6.11-xen-sparse/arch/xen/configs/xen0_defconfig similarity index 100% rename from linux-2.6.10-xen-sparse/arch/xen/configs/xen0_defconfig rename to linux-2.6.11-xen-sparse/arch/xen/configs/xen0_defconfig diff --git a/linux-2.6.10-xen-sparse/arch/xen/configs/xenU_defconfig b/linux-2.6.11-xen-sparse/arch/xen/configs/xenU_defconfig similarity index 100% rename from linux-2.6.10-xen-sparse/arch/xen/configs/xenU_defconfig rename to linux-2.6.11-xen-sparse/arch/xen/configs/xenU_defconfig diff --git a/linux-2.6.10-xen-sparse/arch/xen/i386/Kconfig b/linux-2.6.11-xen-sparse/arch/xen/i386/Kconfig similarity index 98% rename from linux-2.6.10-xen-sparse/arch/xen/i386/Kconfig rename to linux-2.6.11-xen-sparse/arch/xen/i386/Kconfig index 7a3a73d869..f6dd7b5203 100644 --- a/linux-2.6.10-xen-sparse/arch/xen/i386/Kconfig +++ b/linux-2.6.11-xen-sparse/arch/xen/i386/Kconfig @@ -253,6 +253,10 @@ config RWSEM_XCHGADD_ALGORITHM depends on !M386 default y +config GENERIC_CALIBRATE_DELAY + bool + default y + config X86_PPRO_FENCE bool depends on M686 || M586MMX || M586TSC || M586 || M486 || M386 @@ -398,6 +402,17 @@ config PREEMPT Say Y here if you are building a kernel for a desktop, embedded or real-time system. Say N if you are unsure. +config PREEMPT_BKL + bool "Preempt The Big Kernel Lock" + depends on PREEMPT + default y + help + This option reduces the latency of the kernel by making the + big kernel lock preemptible. + + Say Y here if you are building a kernel for a desktop system. + Say N if you are unsure. + #config X86_TSC # bool # depends on (MWINCHIP3D || MWINCHIP2 || MCRUSOE || MEFFICEON || MCYRIXIII || MK7 || MK6 || MPENTIUM4 || MPENTIUMM || MPENTIUMIII || MPENTIUMII || M686 || M586MMX || M586TSC || MK8 || MVIAC3_2) && !X86_NUMAQ @@ -638,7 +653,7 @@ config REGPARM depends on EXPERIMENTAL default n help - Compile the kernel with -mregparm=3. This uses an different ABI + Compile the kernel with -mregparm=3. This uses a different ABI and passes the first three arguments of a function call in registers. This will probably break binary only modules. @@ -729,6 +744,8 @@ config PCI_DIRECT depends on PCI default y +source "drivers/pci/pcie/Kconfig" + source "drivers/pci/Kconfig" config ISA diff --git a/linux-2.6.10-xen-sparse/arch/xen/i386/Makefile b/linux-2.6.11-xen-sparse/arch/xen/i386/Makefile similarity index 100% rename from linux-2.6.10-xen-sparse/arch/xen/i386/Makefile rename to linux-2.6.11-xen-sparse/arch/xen/i386/Makefile diff --git a/linux-2.6.10-xen-sparse/arch/xen/i386/kernel/Makefile b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/Makefile similarity index 100% rename from linux-2.6.10-xen-sparse/arch/xen/i386/kernel/Makefile rename to linux-2.6.11-xen-sparse/arch/xen/i386/kernel/Makefile diff --git a/linux-2.6.10-xen-sparse/arch/xen/i386/kernel/cpu/Makefile b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/cpu/Makefile similarity index 100% rename from linux-2.6.10-xen-sparse/arch/xen/i386/kernel/cpu/Makefile rename to linux-2.6.11-xen-sparse/arch/xen/i386/kernel/cpu/Makefile diff --git a/linux-2.6.10-xen-sparse/arch/xen/i386/kernel/cpu/common.c b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/cpu/common.c similarity index 89% rename from linux-2.6.10-xen-sparse/arch/xen/i386/kernel/cpu/common.c rename to linux-2.6.11-xen-sparse/arch/xen/i386/kernel/cpu/common.c index a29a7f4e97..dc60f9d677 100644 --- a/linux-2.6.10-xen-sparse/arch/xen/i386/kernel/cpu/common.c +++ b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/cpu/common.c @@ -10,6 +10,11 @@ #include #include #include +#ifdef CONFIG_X86_LOCAL_APIC +#include +#include +#include +#endif #include #include "cpu.h" @@ -277,8 +282,10 @@ void __init generic_identify(struct cpuinfo_x86 * c) /* AMD-defined flags: level 0x80000001 */ xlvl = cpuid_eax(0x80000000); if ( (xlvl & 0xffff0000) == 0x80000000 ) { - if ( xlvl >= 0x80000001 ) + if ( xlvl >= 0x80000001 ) { c->x86_capability[1] = cpuid_edx(0x80000001); + c->x86_capability[6] = cpuid_ecx(0x80000001); + } if ( xlvl >= 0x80000004 ) get_model_name(c); /* Default name */ } @@ -324,6 +331,7 @@ void __init identify_cpu(struct cpuinfo_x86 *c) c->x86_model = c->x86_mask = 0; /* So far unknown... */ c->x86_vendor_id[0] = '\0'; /* Unset */ c->x86_model_id[0] = '\0'; /* Unset */ + c->x86_num_cores = 1; memset(&c->x86_capability, 0, sizeof c->x86_capability); if (!have_cpuid_p()) { @@ -337,21 +345,19 @@ void __init identify_cpu(struct cpuinfo_x86 *c) generic_identify(c); - printk(KERN_DEBUG "CPU: After generic identify, caps: %08lx %08lx %08lx %08lx\n", - c->x86_capability[0], - c->x86_capability[1], - c->x86_capability[2], - c->x86_capability[3]); + printk(KERN_DEBUG "CPU: After generic identify, caps:"); + for (i = 0; i < NCAPINTS; i++) + printk(" %08lx", c->x86_capability[i]); + printk("\n"); if (this_cpu->c_identify) { this_cpu->c_identify(c); - printk(KERN_DEBUG "CPU: After vendor identify, caps: %08lx %08lx %08lx %08lx\n", - c->x86_capability[0], - c->x86_capability[1], - c->x86_capability[2], - c->x86_capability[3]); -} + printk(KERN_DEBUG "CPU: After vendor identify, caps:"); + for (i = 0; i < NCAPINTS; i++) + printk(" %08lx", c->x86_capability[i]); + printk("\n"); + } /* * Vendor-specific initialization. In this section we @@ -403,11 +409,10 @@ void __init identify_cpu(struct cpuinfo_x86 *c) /* Now the feature flags better reflect actual CPU features! */ - printk(KERN_DEBUG "CPU: After all inits, caps: %08lx %08lx %08lx %08lx\n", - c->x86_capability[0], - c->x86_capability[1], - c->x86_capability[2], - c->x86_capability[3]); + printk(KERN_DEBUG "CPU: After all inits, caps:"); + for (i = 0; i < NCAPINTS; i++) + printk(" %08lx", c->x86_capability[i]); + printk("\n"); /* * On SMP, boot_cpu_data holds the common feature set between @@ -437,6 +442,50 @@ void __init dodgy_tsc(void) cpu_devs[X86_VENDOR_CYRIX]->c_init(&boot_cpu_data); } +#ifdef CONFIG_X86_HT +void __init detect_ht(struct cpuinfo_x86 *c) +{ + u32 eax, ebx, ecx, edx; + int index_lsb, index_msb, tmp; + int cpu = smp_processor_id(); + + if (!cpu_has(c, X86_FEATURE_HT)) + return; + + cpuid(1, &eax, &ebx, &ecx, &edx); + smp_num_siblings = (ebx & 0xff0000) >> 16; + + if (smp_num_siblings == 1) { + printk(KERN_INFO "CPU: Hyper-Threading is disabled\n"); + } else if (smp_num_siblings > 1 ) { + index_lsb = 0; + index_msb = 31; + + if (smp_num_siblings > NR_CPUS) { + printk(KERN_WARNING "CPU: Unsupported number of the siblings %d", smp_num_siblings); + smp_num_siblings = 1; + return; + } + tmp = smp_num_siblings; + while ((tmp & 1) == 0) { + tmp >>=1 ; + index_lsb++; + } + tmp = smp_num_siblings; + while ((tmp & 0x80000000 ) == 0) { + tmp <<=1 ; + index_msb--; + } + if (index_lsb != index_msb ) + index_msb++; + phys_proc_id[cpu] = phys_pkg_id((ebx >> 24) & 0xFF, index_msb); + + printk(KERN_INFO "CPU: Physical Processor ID: %d\n", + phys_proc_id[cpu]); + } +} +#endif + void __init print_cpu_info(struct cpuinfo_x86 *c) { char *vendor = NULL; @@ -460,7 +509,7 @@ void __init print_cpu_info(struct cpuinfo_x86 *c) printk("\n"); } -unsigned long cpu_initialized __initdata = 0; +cpumask_t cpu_initialized __initdata = CPU_MASK_NONE; /* This is hacky. :) * We're emulating future behavior. @@ -533,7 +582,7 @@ void __init cpu_init (void) struct tss_struct * t = &per_cpu(init_tss, cpu); struct thread_struct *thread = ¤t->thread; - if (test_and_set_bit(cpu, &cpu_initialized)) { + if (cpu_test_and_set(cpu, cpu_initialized)) { printk(KERN_WARNING "CPU#%d already initialized!\n", cpu); for (;;) local_irq_enable(); } @@ -601,6 +650,6 @@ void __init cpu_init (void) * Force FPU initialization: */ current_thread_info()->status = 0; - current->used_math = 0; + clear_used_math(); mxcsr_feature_mask_init(); } diff --git a/linux-2.6.10-xen-sparse/arch/xen/i386/kernel/cpu/mtrr/Makefile b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/cpu/mtrr/Makefile similarity index 100% rename from linux-2.6.10-xen-sparse/arch/xen/i386/kernel/cpu/mtrr/Makefile rename to linux-2.6.11-xen-sparse/arch/xen/i386/kernel/cpu/mtrr/Makefile diff --git a/linux-2.6.10-xen-sparse/arch/xen/i386/kernel/cpu/mtrr/main.c b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/cpu/mtrr/main.c similarity index 100% rename from linux-2.6.10-xen-sparse/arch/xen/i386/kernel/cpu/mtrr/main.c rename to linux-2.6.11-xen-sparse/arch/xen/i386/kernel/cpu/mtrr/main.c diff --git a/linux-2.6.10-xen-sparse/arch/xen/i386/kernel/entry.S b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/entry.S similarity index 99% rename from linux-2.6.10-xen-sparse/arch/xen/i386/kernel/entry.S rename to linux-2.6.11-xen-sparse/arch/xen/i386/kernel/entry.S index 321f89e3d4..af2fad5236 100644 --- a/linux-2.6.10-xen-sparse/arch/xen/i386/kernel/entry.S +++ b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/entry.S @@ -198,6 +198,7 @@ ENTRY(resume_userspace) #ifdef CONFIG_PREEMPT ENTRY(resume_kernel) XEN_GET_VCPU_INFO(%esi) + XEN_BLOCK_EVENTS(%esi) cmpl $0,TI_preempt_count(%ebp) # non-zero preempt_count ? jnz restore_all need_resched: @@ -206,11 +207,7 @@ need_resched: jz restore_all testb $0xFF,EVENT_MASK(%esp) # interrupts off (exception path) ? jnz restore_all - movl $PREEMPT_ACTIVE,TI_preempt_count(%ebp) - XEN_UNBLOCK_EVENTS(%esi) - call schedule - XEN_BLOCK_EVENTS(%esi) - movl $0,TI_preempt_count(%ebp) + call preempt_schedule_irq jmp need_resched #endif diff --git a/linux-2.6.10-xen-sparse/arch/xen/i386/kernel/head.S b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/head.S similarity index 100% rename from linux-2.6.10-xen-sparse/arch/xen/i386/kernel/head.S rename to linux-2.6.11-xen-sparse/arch/xen/i386/kernel/head.S diff --git a/linux-2.6.10-xen-sparse/arch/xen/i386/kernel/i386_ksyms.c b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/i386_ksyms.c similarity index 98% rename from linux-2.6.10-xen-sparse/arch/xen/i386/kernel/i386_ksyms.c rename to linux-2.6.11-xen-sparse/arch/xen/i386/kernel/i386_ksyms.c index 90c816717a..0a326e4af6 100644 --- a/linux-2.6.10-xen-sparse/arch/xen/i386/kernel/i386_ksyms.c +++ b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/i386_ksyms.c @@ -61,7 +61,6 @@ extern unsigned long get_cmos_time(void); /* platform dependent support */ EXPORT_SYMBOL(boot_cpu_data); -EXPORT_SYMBOL(MCA_bus); #ifdef CONFIG_DISCONTIGMEM EXPORT_SYMBOL(node_data); EXPORT_SYMBOL(physnode_map); @@ -75,7 +74,6 @@ EXPORT_SYMBOL_GPL(kernel_fpu_begin); EXPORT_SYMBOL(__ioremap); EXPORT_SYMBOL(ioremap_nocache); EXPORT_SYMBOL(iounmap); -EXPORT_SYMBOL(probe_irq_mask); EXPORT_SYMBOL(kernel_thread); EXPORT_SYMBOL(pm_idle); #ifdef CONFIG_APM diff --git a/linux-2.6.10-xen-sparse/arch/xen/i386/kernel/ioport.c b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/ioport.c similarity index 100% rename from linux-2.6.10-xen-sparse/arch/xen/i386/kernel/ioport.c rename to linux-2.6.11-xen-sparse/arch/xen/i386/kernel/ioport.c diff --git a/linux-2.6.10-xen-sparse/arch/xen/i386/kernel/ldt.c b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/ldt.c similarity index 100% rename from linux-2.6.10-xen-sparse/arch/xen/i386/kernel/ldt.c rename to linux-2.6.11-xen-sparse/arch/xen/i386/kernel/ldt.c diff --git a/linux-2.6.10-xen-sparse/arch/xen/i386/kernel/microcode.c b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/microcode.c similarity index 100% rename from linux-2.6.10-xen-sparse/arch/xen/i386/kernel/microcode.c rename to linux-2.6.11-xen-sparse/arch/xen/i386/kernel/microcode.c diff --git a/linux-2.6.10-xen-sparse/arch/xen/i386/kernel/pci-dma.c b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/pci-dma.c similarity index 93% rename from linux-2.6.10-xen-sparse/arch/xen/i386/kernel/pci-dma.c rename to linux-2.6.11-xen-sparse/arch/xen/i386/kernel/pci-dma.c index c346281405..f93554394d 100644 --- a/linux-2.6.10-xen-sparse/arch/xen/i386/kernel/pci-dma.c +++ b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/pci-dma.c @@ -36,6 +36,7 @@ xen_contig_memory(unsigned long vstart, unsigned int order) * hypercalls reduced. */ pgd_t *pgd; + pud_t *pud; pmd_t *pmd; pte_t *pte; unsigned long pfn, i, flags; @@ -47,7 +48,8 @@ xen_contig_memory(unsigned long vstart, unsigned int order) /* 1. Zap current PTEs, giving away the underlying pages. */ for (i = 0; i < (1<pte_low >> PAGE_SHIFT; queue_l1_entry_update(pte, 0); @@ -63,7 +65,8 @@ xen_contig_memory(unsigned long vstart, unsigned int order) /* 3. Map the new extent in place of old pages. */ for (i = 0; i < (1<dma_mem = kmalloc(GFP_KERNEL, sizeof(struct dma_coherent_mem)); + dev->dma_mem = kmalloc(sizeof(struct dma_coherent_mem), GFP_KERNEL); if (!dev->dma_mem) goto out; memset(dev->dma_mem, 0, sizeof(struct dma_coherent_mem)); - dev->dma_mem->bitmap = kmalloc(GFP_KERNEL, bitmap_size); + dev->dma_mem->bitmap = kmalloc(bitmap_size, GFP_KERNEL); if (!dev->dma_mem->bitmap) goto free1_out; memset(dev->dma_mem->bitmap, 0, bitmap_size); @@ -206,6 +209,7 @@ void dma_release_declared_memory(struct device *dev) if(!mem) return; dev->dma_mem = NULL; + iounmap(mem->virt_base); kfree(mem->bitmap); kfree(mem); } diff --git a/linux-2.6.10-xen-sparse/arch/xen/i386/kernel/process.c b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/process.c similarity index 97% rename from linux-2.6.10-xen-sparse/arch/xen/i386/kernel/process.c rename to linux-2.6.11-xen-sparse/arch/xen/i386/kernel/process.c index 26dee14c3a..6500ff9a2c 100644 --- a/linux-2.6.10-xen-sparse/arch/xen/i386/kernel/process.c +++ b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/process.c @@ -74,6 +74,7 @@ unsigned long thread_saved_pc(struct task_struct *tsk) * Powermanagement idle function, if any.. */ void (*pm_idle)(void); +static cpumask_t cpu_idle_map; void disable_hlt(void) { @@ -119,23 +120,39 @@ void xen_idle(void) */ void cpu_idle (void) { + int cpu = _smp_processor_id(); + /* endless idle loop with no priority at all */ while (1) { while (!need_resched()) { - /* - * Mark this as an RCU critical section so that - * synchronize_kernel() in the unload path waits - * for our completion. - */ - rcu_read_lock(); - irq_stat[smp_processor_id()].idle_timestamp = jiffies; + + if (cpu_isset(cpu, cpu_idle_map)) + cpu_clear(cpu, cpu_idle_map); + rmb(); + + irq_stat[cpu].idle_timestamp = jiffies; xen_idle(); - rcu_read_unlock(); } schedule(); } } +void cpu_idle_wait(void) +{ + int cpu; + cpumask_t map; + + for_each_online_cpu(cpu) + cpu_set(cpu, cpu_idle_map); + + wmb(); + do { + ssleep(1); + cpus_and(map, cpu_idle_map, cpu_online_map); + } while (!cpus_empty(map)); +} +EXPORT_SYMBOL_GPL(cpu_idle_wait); + /* XXX XEN doesn't use mwait_idle(), select_idle_routine(), idle_setup(). */ /* Always use xen_idle() instead. */ void __init select_idle_routine(const struct cpuinfo_x86 *c) {} @@ -237,7 +254,7 @@ void flush_thread(void) * Forget coprocessor state.. */ clear_fpu(tsk); - tsk->used_math = 0; + clear_used_math(); } void release_thread(struct task_struct *dead_task) diff --git a/linux-2.6.10-xen-sparse/arch/xen/i386/kernel/setup.c b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/setup.c similarity index 96% rename from linux-2.6.10-xen-sparse/arch/xen/i386/kernel/setup.c rename to linux-2.6.11-xen-sparse/arch/xen/i386/kernel/setup.c index ab77b119b1..9b892b04ba 100644 --- a/linux-2.6.10-xen-sparse/arch/xen/i386/kernel/setup.c +++ b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/setup.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -86,7 +87,6 @@ int __initdata acpi_force = 0; extern acpi_interrupt_flags acpi_sci_flags; #endif -int MCA_bus; /* for MCA, but anyone else can use it if they want */ unsigned int machine_id; unsigned int machine_submodel_id; @@ -96,6 +96,9 @@ unsigned int mca_pentium_flag; /* For PCI or other memory-mapped resources */ unsigned long pci_mem_start = 0x10000000; +/* Boot loader ID as an integer, for the benefit of proc_dointvec */ +int bootloader_type; + /* user-defined highmem size */ static unsigned int highmem_pages = -1; @@ -694,6 +697,8 @@ static void __init parse_cmdline_early (char ** cmdline_p) saved_command_line[COMMAND_LINE_SIZE-1] = '\0'; for (;;) { + if (c != ' ') + goto next_char; /* * "mem=nopentium" disables the 4MB page tables. * "mem=XXX[kKmM]" defines a memory region from HIGH_MEM @@ -704,7 +709,7 @@ static void __init parse_cmdline_early (char ** cmdline_p) * HPA tells me bootloaders need to parse mem=, so no new * option should be mem= [also see Documentation/i386/boot.txt] */ - if (c == ' ' && !memcmp(from, "mem=", 4)) { + if (!memcmp(from, "mem=", 4)) { if (to != command_line) to--; if (!memcmp(from+4, "nopentium", 9)) { @@ -731,7 +736,7 @@ static void __init parse_cmdline_early (char ** cmdline_p) } } - if (c == ' ' && !memcmp(from, "memmap=", 7)) { + else if (!memcmp(from, "memmap=", 7)) { if (to != command_line) to--; if (!memcmp(from+7, "exactmap", 8)) { @@ -764,6 +769,10 @@ static void __init parse_cmdline_early (char ** cmdline_p) } } + else if (!memcmp(from, "noexec=", 7)) + noexec_setup(from + 7); + + #ifdef CONFIG_X86_SMP /* * If the BIOS enumerates physical processors before logical, @@ -839,7 +848,7 @@ static void __init parse_cmdline_early (char ** cmdline_p) * This works even on boxes that have no highmem otherwise. * This also works to reduce highmem size on bigger boxes. */ - if (c == ' ' && !memcmp(from, "highmem=", 8)) + else if (!memcmp(from, "highmem=", 8)) highmem_pages = memparse(from+8, &from) >> PAGE_SHIFT; /* @@ -847,9 +856,10 @@ static void __init parse_cmdline_early (char ** cmdline_p) * bytes. This can be used to increase (or decrease) the * vmalloc area - the default is 128m. */ - if (c == ' ' && !memcmp(from, "vmalloc=", 8)) + else if (!memcmp(from, "vmalloc=", 8)) __VMALLOC_RESERVE = memparse(from+8, &from); + next_char: c = *(from++); if (!c) break; @@ -1191,9 +1201,10 @@ legacy_init_iomem_resources(struct resource *code_resource, struct resource *dat /* * Request address space for all standard resources */ -static void __init register_memory(unsigned long max_low_pfn) +static void __init register_memory(void) { - unsigned long low_mem_size; + unsigned long gapstart, gapsize; + unsigned long long last; int i; if (efi_enabled) @@ -1208,10 +1219,46 @@ static void __init register_memory(unsigned long max_low_pfn) for (i = 0; i < STANDARD_IO_RESOURCES; i++) request_resource(&ioport_resource, &standard_io_resources[i]); - /* Tell the PCI layer not to allocate too close to the RAM area.. */ - low_mem_size = ((max_low_pfn << PAGE_SHIFT) + 0xfffff) & ~0xfffff; - if (low_mem_size > pci_mem_start) - pci_mem_start = low_mem_size; + /* + * Search for the bigest gap in the low 32 bits of the e820 + * memory space. + */ + last = 0x100000000ull; + gapstart = 0x10000000; + gapsize = 0x400000; + i = e820.nr_map; + while (--i >= 0) { + unsigned long long start = e820.map[i].addr; + unsigned long long end = start + e820.map[i].size; + + /* + * Since "last" is at most 4GB, we know we'll + * fit in 32 bits if this condition is true + */ + if (last > end) { + unsigned long gap = last - end; + + if (gap > gapsize) { + gapsize = gap; + gapstart = end; + } + } + if (start < last) + last = start; + } + + /* + * Start allocating dynamic PCI memory a bit into the gap, + * aligned up to the nearest megabyte. + * + * Question: should we try to pad it up a bit (do something + * like " + (gapsize >> 3)" in there too?). We now have the + * technology. + */ + pci_mem_start = (gapstart + 0xfffff) & ~0xfffff; + + printk("Allocating PCI resources starting at %08lx (gap: %08lx:%08lx)\n", + pci_mem_start, gapstart, gapsize); } /* Use inline assembly to define this because the nops are defined @@ -1323,6 +1370,15 @@ __setup("noreplacement", noreplacement_setup); static char * __init machine_specific_memory_setup(void); +#ifdef CONFIG_MCA +static void set_mca_bus(int x) +{ + MCA_bus = x; +} +#else +static void set_mca_bus(int x) { } +#endif + /* * Determine if we were loaded by an EFI loader. If so, then we have also been * passed the efi memmap, systab, etc., so we should use these data structures @@ -1370,12 +1426,13 @@ void __init setup_arch(char **cmdline_p) ist_info = IST_INFO; saved_videomode = VIDEO_MODE; if( SYS_DESC_TABLE.length != 0 ) { - MCA_bus = SYS_DESC_TABLE.table[3] &0x2; + set_mca_bus(SYS_DESC_TABLE.table[3] & 0x2); machine_id = SYS_DESC_TABLE.table[0]; machine_submodel_id = SYS_DESC_TABLE.table[1]; BIOS_revision = SYS_DESC_TABLE.table[2]; } aux_device_present = AUX_DEVICE_INFO; + bootloader_type = LOADER_TYPE; #ifdef CONFIG_XEN_PHYSDEV_ACCESS /* This is drawn from a dump from vgacon:startup in standard Linux. */ @@ -1487,6 +1544,7 @@ void __init setup_arch(char **cmdline_p) /* * Parse the ACPI tables for possible boot-time SMP configuration. */ + acpi_boot_table_init(); acpi_boot_init(); #ifdef CONFIG_X86_LOCAL_APIC @@ -1498,7 +1556,7 @@ void __init setup_arch(char **cmdline_p) * conflicts. */ noirqdebug_setup(""); - register_memory(max_low_pfn); + register_memory(); /* If we are a privileged guest OS then we should request IO privs. */ if (xen_start_info.flags & SIF_PRIVILEGED) { diff --git a/linux-2.6.10-xen-sparse/arch/xen/i386/kernel/signal.c b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/signal.c similarity index 95% rename from linux-2.6.10-xen-sparse/arch/xen/i386/kernel/signal.c rename to linux-2.6.11-xen-sparse/arch/xen/i386/kernel/signal.c index cdd26f714e..9e17fc80e9 100644 --- a/linux-2.6.10-xen-sparse/arch/xen/i386/kernel/signal.c +++ b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/signal.c @@ -190,6 +190,12 @@ restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc, int *peax if (verify_area(VERIFY_READ, buf, sizeof(*buf))) goto badframe; err |= restore_i387(buf); + } else { + struct task_struct *me = current; + if (used_math()) { + clear_fpu(me); + clear_used_math(); + } } } @@ -270,7 +276,6 @@ setup_sigcontext(struct sigcontext __user *sc, struct _fpstate __user *fpstate, struct pt_regs *regs, unsigned long mask) { int tmp, err = 0; - unsigned long eflags; tmp = 0; __asm__("movl %%gs,%0" : "=r"(tmp): "0"(tmp)); @@ -292,16 +297,7 @@ setup_sigcontext(struct sigcontext __user *sc, struct _fpstate __user *fpstate, err |= __put_user(current->thread.error_code, &sc->err); err |= __put_user(regs->eip, &sc->eip); err |= __put_user(regs->xcs, (unsigned int __user *)&sc->cs); - - /* - * Iff TF was set because the program is being single-stepped by a - * debugger, don't save that information on the signal stack.. We - * don't want debugging to change state. - */ - eflags = regs->eflags; - if (current->ptrace & PT_DTRACE) - eflags &= ~TF_MASK; - err |= __put_user(eflags, &sc->eflags); + err |= __put_user(regs->eflags, &sc->eflags); err |= __put_user(regs->esp, &sc->esp_at_signal); err |= __put_user(regs->xss, (unsigned int __user *)&sc->ss); @@ -369,20 +365,20 @@ static void setup_frame(int sig, struct k_sigaction *ka, ? current_thread_info()->exec_domain->signal_invmap[sig] : sig; - err |= __put_user(usig, &frame->sig); + err = __put_user(usig, &frame->sig); if (err) goto give_sigsegv; - err |= setup_sigcontext(&frame->sc, &frame->fpstate, regs, set->sig[0]); + err = setup_sigcontext(&frame->sc, &frame->fpstate, regs, set->sig[0]); if (err) goto give_sigsegv; if (_NSIG_WORDS > 1) { - err |= __copy_to_user(&frame->extramask, &set->sig[1], + err = __copy_to_user(&frame->extramask, &set->sig[1], sizeof(frame->extramask)); + if (err) + goto give_sigsegv; } - if (err) - goto give_sigsegv; restorer = &__kernel_sigreturn; if (ka->sa.sa_flags & SA_RESTORER) @@ -424,11 +420,9 @@ static void setup_frame(int sig, struct k_sigaction *ka, * The tracer may want to single-step inside the * handler too. */ - if (regs->eflags & TF_MASK) { - regs->eflags &= ~TF_MASK; - if (current->ptrace & PT_DTRACE) - ptrace_notify(SIGTRAP); - } + regs->eflags &= ~TF_MASK; + if (test_thread_flag(TIF_SINGLESTEP)) + ptrace_notify(SIGTRAP); #if DEBUG_SIG printk("SIG deliver (%s:%d): sp=%p pc=%p ra=%p\n", @@ -519,11 +513,9 @@ static void setup_rt_frame(int sig, struct k_sigaction *ka, siginfo_t *info, * The tracer may want to single-step inside the * handler too. */ - if (regs->eflags & TF_MASK) { - regs->eflags &= ~TF_MASK; - if (current->ptrace & PT_DTRACE) - ptrace_notify(SIGTRAP); - } + regs->eflags &= ~TF_MASK; + if (test_thread_flag(TIF_SINGLESTEP)) + ptrace_notify(SIGTRAP); #if DEBUG_SIG printk("SIG deliver (%s:%d): sp=%p pc=%p ra=%p\n", diff --git a/linux-2.6.10-xen-sparse/arch/xen/i386/kernel/time.c b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/time.c similarity index 99% rename from linux-2.6.10-xen-sparse/arch/xen/i386/kernel/time.c rename to linux-2.6.11-xen-sparse/arch/xen/i386/kernel/time.c index 47eabac205..5d09ae6a0a 100644 --- a/linux-2.6.10-xen-sparse/arch/xen/i386/kernel/time.c +++ b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/time.c @@ -45,6 +45,7 @@ #include #include #include +#include #include #include @@ -79,9 +80,9 @@ unsigned long cpu_khz; /* Detected as we calibrate the TSC */ extern unsigned long wall_jiffies; -spinlock_t rtc_lock = SPIN_LOCK_UNLOCKED; +DEFINE_SPINLOCK(rtc_lock); -spinlock_t i8253_lock = SPIN_LOCK_UNLOCKED; +DEFINE_SPINLOCK(i8253_lock); EXPORT_SYMBOL(i8253_lock); extern struct init_timer_opts timer_tsc_init; @@ -562,12 +563,13 @@ static int timer_resume(struct sys_device *dev) hpet_reenable(); #endif sec = get_cmos_time() + clock_cmos_diff; - sleep_length = get_cmos_time() - sleep_start; + sleep_length = (get_cmos_time() - sleep_start) * HZ; write_seqlock_irqsave(&xtime_lock, flags); xtime.tv_sec = sec; xtime.tv_nsec = 0; write_sequnlock_irqrestore(&xtime_lock, flags); - jiffies += sleep_length * HZ; + jiffies += sleep_length; + wall_jiffies += sleep_length; return 0; } diff --git a/linux-2.6.10-xen-sparse/arch/xen/i386/kernel/timers/Makefile b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/timers/Makefile similarity index 100% rename from linux-2.6.10-xen-sparse/arch/xen/i386/kernel/timers/Makefile rename to linux-2.6.11-xen-sparse/arch/xen/i386/kernel/timers/Makefile diff --git a/linux-2.6.10-xen-sparse/arch/xen/i386/kernel/timers/timer_tsc.c b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/timers/timer_tsc.c similarity index 100% rename from linux-2.6.10-xen-sparse/arch/xen/i386/kernel/timers/timer_tsc.c rename to linux-2.6.11-xen-sparse/arch/xen/i386/kernel/timers/timer_tsc.c diff --git a/linux-2.6.10-xen-sparse/arch/xen/i386/kernel/traps.c b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/traps.c similarity index 95% rename from linux-2.6.10-xen-sparse/arch/xen/i386/kernel/traps.c rename to linux-2.6.11-xen-sparse/arch/xen/i386/kernel/traps.c index 1936d96c37..47396aa186 100644 --- a/linux-2.6.10-xen-sparse/arch/xen/i386/kernel/traps.c +++ b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/traps.c @@ -90,7 +90,7 @@ asmlinkage void machine_check(void); static int kstack_depth_to_print = 24; struct notifier_block *i386die_chain; -static spinlock_t die_notifier_lock = SPIN_LOCK_UNLOCKED; +static DEFINE_SPINLOCK(die_notifier_lock); int register_die_notifier(struct notifier_block *nb) { @@ -303,7 +303,7 @@ void die(const char * str, struct pt_regs * regs, long err) }; static int die_counter; - if (die.lock_owner != smp_processor_id()) { + if (die.lock_owner != _smp_processor_id()) { console_verbose(); spin_lock_irq(&die.lock); die.lock_owner = smp_processor_id(); @@ -543,7 +543,7 @@ static void unknown_nmi_error(unsigned char reason, struct pt_regs * regs) printk("Do you have a strange power saving mode enabled?\n"); } -static spinlock_t nmi_print_lock = SPIN_LOCK_UNLOCKED; +static DEFINE_SPINLOCK(nmi_print_lock); void die_nmi (struct pt_regs *regs, const char *msg) { @@ -674,7 +674,6 @@ fastcall void do_debug(struct pt_regs * regs, long error_code) { unsigned int condition; struct task_struct *tsk = current; - siginfo_t info; condition = HYPERVISOR_get_debugreg(6); @@ -699,36 +698,29 @@ fastcall void do_debug(struct pt_regs * regs, long error_code) /* Save debug status register where ptrace can see it */ tsk->thread.debugreg[6] = condition; - /* Mask out spurious TF errors due to lazy TF clearing */ + /* + * Single-stepping through TF: make sure we ignore any events in + * kernel space (but re-enable TF when returning to user mode). + * And if the event was due to a debugger (PT_DTRACE), clear the + * TF flag so that register information is correct. + */ if (condition & DR_STEP) { /* - * The TF error should be masked out only if the current - * process is not traced and if the TRAP flag has been set - * previously by a tracing process (condition detected by - * the PT_DTRACE flag); remember that the i386 TRAP flag - * can be modified by the process itself in user mode, - * allowing programs to debug themselves without the ptrace() - * interface. + * We already checked v86 mode above, so we can + * check for kernel mode by just checking the CPL + * of CS. */ if ((regs->xcs & 2) == 0) goto clear_TF_reenable; - if ((tsk->ptrace & (PT_DTRACE|PT_PTRACED)) == PT_DTRACE) - goto clear_TF; + + if (likely(tsk->ptrace & PT_DTRACE)) { + tsk->ptrace &= ~PT_DTRACE; + regs->eflags &= ~TF_MASK; + } } /* Ok, finally something we can handle */ - tsk->thread.trap_no = 1; - tsk->thread.error_code = error_code; - info.si_signo = SIGTRAP; - info.si_errno = 0; - info.si_code = TRAP_BRKPT; - - /* If this is a kernel mode trap, save the user PC on entry to - * the kernel, that's what the debugger can make sense of. - */ - info.si_addr = ((regs->xcs & 2) == 0) ? (void __user *)tsk->thread.eip - : (void __user *)regs->eip; - force_sig_info(SIGTRAP, &info, tsk); + send_sigtrap(tsk, regs, error_code); /* Disable additional traps. They'll be re-enabled when * the signal is delivered. @@ -743,7 +735,6 @@ debug_vm86: clear_TF_reenable: set_tsk_thread_flag(tsk, TIF_SINGLESTEP); -clear_TF: regs->eflags &= ~TF_MASK; return; } @@ -911,7 +902,7 @@ asmlinkage void math_state_restore(struct pt_regs regs) return; clts(); /* Allow maths ops (or we recurse) */ - if (!tsk->used_math) + if (!tsk_used_math(tsk)) init_fpu(tsk); restore_fpu(tsk); thread->status |= TS_USEDFPU; /* So we fnsave on switch_to() */ diff --git a/linux-2.6.10-xen-sparse/arch/xen/i386/kernel/vsyscall.S b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/vsyscall.S similarity index 100% rename from linux-2.6.10-xen-sparse/arch/xen/i386/kernel/vsyscall.S rename to linux-2.6.11-xen-sparse/arch/xen/i386/kernel/vsyscall.S diff --git a/linux-2.6.10-xen-sparse/arch/xen/i386/kernel/vsyscall.lds b/linux-2.6.11-xen-sparse/arch/xen/i386/kernel/vsyscall.lds similarity index 100% rename from linux-2.6.10-xen-sparse/arch/xen/i386/kernel/vsyscall.lds rename to linux-2.6.11-xen-sparse/arch/xen/i386/kernel/vsyscall.lds diff --git a/linux-2.6.10-xen-sparse/arch/xen/i386/mm/Makefile b/linux-2.6.11-xen-sparse/arch/xen/i386/mm/Makefile similarity index 100% rename from linux-2.6.10-xen-sparse/arch/xen/i386/mm/Makefile rename to linux-2.6.11-xen-sparse/arch/xen/i386/mm/Makefile diff --git a/linux-2.6.10-xen-sparse/arch/xen/i386/mm/fault.c b/linux-2.6.11-xen-sparse/arch/xen/i386/mm/fault.c similarity index 97% rename from linux-2.6.10-xen-sparse/arch/xen/i386/mm/fault.c rename to linux-2.6.11-xen-sparse/arch/xen/i386/mm/fault.c index 9b5ba2b3d7..2d412dc366 100644 --- a/linux-2.6.10-xen-sparse/arch/xen/i386/mm/fault.c +++ b/linux-2.6.11-xen-sparse/arch/xen/i386/mm/fault.c @@ -114,9 +114,7 @@ static inline unsigned long get_segment_eip(struct pt_regs *regs, } /* Decode the code segment base from the descriptor */ - base = (desc[0] >> 16) | - ((desc[1] & 0xff) << 16) | - (desc[1] & 0xff000000); + base = get_desc_base((unsigned long *)desc); if (seg & (1<<2)) { up(¤t->mm->context.sem); @@ -527,6 +525,7 @@ vmalloc_fault: */ int index = pgd_index(address); pgd_t *pgd, *pgd_k; + pud_t *pud, *pud_k; pmd_t *pmd, *pmd_k; pte_t *pte_k; @@ -538,11 +537,17 @@ vmalloc_fault: /* * set_pgd(pgd, *pgd_k); here would be useless on PAE - * and redundant with the set_pmd() on non-PAE. + * and redundant with the set_pmd() on non-PAE. As would + * set_pud. */ - pmd = pmd_offset(pgd, address); - pmd_k = pmd_offset(pgd_k, address); + pud = pud_offset(pgd, address); + pud_k = pud_offset(pgd_k, address); + if (!pud_present(*pud_k)) + goto no_context; + + pmd = pmd_offset(pud, address); + pmd_k = pmd_offset(pud_k, address); if (!pmd_present(*pmd_k)) goto no_context; set_pmd(pmd, *pmd_k); diff --git a/linux-2.6.10-xen-sparse/arch/xen/i386/mm/highmem.c b/linux-2.6.11-xen-sparse/arch/xen/i386/mm/highmem.c similarity index 96% rename from linux-2.6.10-xen-sparse/arch/xen/i386/mm/highmem.c rename to linux-2.6.11-xen-sparse/arch/xen/i386/mm/highmem.c index 6b6fd16514..7f771f81fd 100644 --- a/linux-2.6.10-xen-sparse/arch/xen/i386/mm/highmem.c +++ b/linux-2.6.11-xen-sparse/arch/xen/i386/mm/highmem.c @@ -3,7 +3,7 @@ void *kmap(struct page *page) { might_sleep(); - if (page < highmem_start_page) + if (!PageHighMem(page)) return page_address(page); return kmap_high(page); } @@ -12,7 +12,7 @@ void kunmap(struct page *page) { if (in_interrupt()) BUG(); - if (page < highmem_start_page) + if (!PageHighMem(page)) return; kunmap_high(page); } @@ -32,7 +32,7 @@ void *kmap_atomic(struct page *page, enum km_type type) /* even !CONFIG_PREEMPT needs this, for in_atomic in do_page_fault */ inc_preempt_count(); - if (page < highmem_start_page) + if (!PageHighMem(page)) return page_address(page); idx = type + KM_TYPE_NR*smp_processor_id(); diff --git a/linux-2.6.10-xen-sparse/arch/xen/i386/mm/hypervisor.c b/linux-2.6.11-xen-sparse/arch/xen/i386/mm/hypervisor.c similarity index 96% rename from linux-2.6.10-xen-sparse/arch/xen/i386/mm/hypervisor.c rename to linux-2.6.11-xen-sparse/arch/xen/i386/mm/hypervisor.c index 81d0b8450c..6b19a2030a 100644 --- a/linux-2.6.10-xen-sparse/arch/xen/i386/mm/hypervisor.c +++ b/linux-2.6.11-xen-sparse/arch/xen/i386/mm/hypervisor.c @@ -120,12 +120,12 @@ void queue_l1_entry_update(pte_t *ptr, unsigned long val) spin_unlock_irqrestore(&update_lock, flags); } -void queue_l2_entry_update(pmd_t *ptr, unsigned long val) +void queue_l2_entry_update(pmd_t *ptr, pmd_t val) { unsigned long flags; spin_lock_irqsave(&update_lock, flags); update_queue[idx].ptr = virt_to_machine(ptr); - update_queue[idx].val = val; + update_queue[idx].val = val.pud.pgd.pgd; /* XXX pmd_val_ma */ increment_index(); spin_unlock_irqrestore(&update_lock, flags); } @@ -237,12 +237,12 @@ void xen_l1_entry_update(pte_t *ptr, unsigned long val) spin_unlock_irqrestore(&update_lock, flags); } -void xen_l2_entry_update(pmd_t *ptr, unsigned long val) +void xen_l2_entry_update(pmd_t *ptr, pmd_t val) { unsigned long flags; spin_lock_irqsave(&update_lock, flags); update_queue[idx].ptr = virt_to_machine(ptr); - update_queue[idx].val = val; + update_queue[idx].val = val.pud.pgd.pgd; /* XXX pmd_val_ma */ increment_index_and_flush(); spin_unlock_irqrestore(&update_lock, flags); } @@ -348,6 +348,7 @@ void xen_machphys_update(unsigned long mfn, unsigned long pfn) unsigned long allocate_empty_lowmem_region(unsigned long pages) { pgd_t *pgd; + pud_t *pud; pmd_t *pmd; pte_t *pte; unsigned long *pfn_array; @@ -368,7 +369,8 @@ unsigned long allocate_empty_lowmem_region(unsigned long pages) for ( i = 0; i < (1<pte_low >> PAGE_SHIFT; queue_l1_entry_update(pte, 0); diff --git a/linux-2.6.10-xen-sparse/arch/xen/i386/mm/init.c b/linux-2.6.11-xen-sparse/arch/xen/i386/mm/init.c similarity index 97% rename from linux-2.6.10-xen-sparse/arch/xen/i386/mm/init.c rename to linux-2.6.11-xen-sparse/arch/xen/i386/mm/init.c index 08d4f0c0ce..6548508bae 100644 --- a/linux-2.6.10-xen-sparse/arch/xen/i386/mm/init.c +++ b/linux-2.6.11-xen-sparse/arch/xen/i386/mm/init.c @@ -55,15 +55,18 @@ static int noinline do_test_wp_bit(void); */ static pmd_t * __init one_md_table_init(pgd_t *pgd) { + pud_t *pud; pmd_t *pmd_table; #ifdef CONFIG_X86_PAE pmd_table = (pmd_t *) alloc_bootmem_low_pages(PAGE_SIZE); set_pgd(pgd, __pgd(__pa(pmd_table) | _PAGE_PRESENT)); - if (pmd_table != pmd_offset(pgd, 0)) + pud = pud_offset(pgd, 0); + if (pmd_table != pmd_offset(pud, 0)) BUG(); #else - pmd_table = pmd_offset(pgd, 0); + pud = pud_offset(pgd, 0); + pmd_table = pmd_offset(pud, 0); #endif return pmd_table; @@ -102,6 +105,7 @@ static pte_t * __init one_page_table_init(pmd_t *pmd) static void __init page_table_range_init (unsigned long start, unsigned long end, pgd_t *pgd_base) { pgd_t *pgd; + pud_t *pud; pmd_t *pmd; int pgd_idx, pmd_idx; unsigned long vaddr; @@ -114,8 +118,8 @@ static void __init page_table_range_init (unsigned long start, unsigned long end for ( ; (pgd_idx < PTRS_PER_PGD_NO_HV) && (vaddr != end); pgd++, pgd_idx++) { if (pgd_none(*pgd)) one_md_table_init(pgd); - - pmd = pmd_offset(pgd, vaddr); + pud = pud_offset(pgd, vaddr); + pmd = pmd_offset(pud, vaddr); for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end); pmd++, pmd_idx++) { if (pmd_none(*pmd)) one_page_table_init(pmd); @@ -128,7 +132,7 @@ static void __init page_table_range_init (unsigned long start, unsigned long end static inline int is_kernel_text(unsigned long addr) { - if (addr >= (unsigned long)_stext && addr <= (unsigned long)__init_end) + if (addr >= PAGE_OFFSET && addr <= (unsigned long)__init_end) return 1; return 0; } @@ -249,7 +253,7 @@ EXPORT_SYMBOL(kmap_prot); EXPORT_SYMBOL(kmap_pte); #define kmap_get_fixmap_pte(vaddr) \ - pte_offset_kernel(pmd_offset(pgd_offset_k(vaddr), (vaddr)), (vaddr)) + pte_offset_kernel(pmd_offset(pud_offset(pgd_offset_k(vaddr), vaddr), (vaddr)), (vaddr)) void __init kmap_init(void) { @@ -265,6 +269,7 @@ void __init kmap_init(void) void __init permanent_kmaps_init(pgd_t *pgd_base) { pgd_t *pgd; + pud_t *pud; pmd_t *pmd; pte_t *pte; unsigned long vaddr; @@ -273,7 +278,8 @@ void __init permanent_kmaps_init(pgd_t *pgd_base) page_table_range_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base); pgd = swapper_pg_dir + pgd_index(vaddr); - pmd = pmd_offset(pgd, vaddr); + pud = pud_offset(pgd, vaddr); + pmd = pmd_offset(pud, vaddr); pte = pte_offset_kernel(pmd, vaddr); pkmap_page_table = pte; } @@ -464,7 +470,7 @@ u64 __supported_pte_mask = ~_PAGE_NX; * on Enable * off Disable */ -static int __init noexec_setup(char *str) +void __init noexec_setup(const char *str) { if (!strncmp(str, "on",2) && cpu_has_nx) { __supported_pte_mask |= _PAGE_NX; @@ -473,11 +479,8 @@ static int __init noexec_setup(char *str) disable_nx = 1; __supported_pte_mask &= ~_PAGE_NX; } - return 1; } -__setup("noexec=", noexec_setup); - int nx_enabled = 0; #ifdef CONFIG_X86_PAE @@ -607,7 +610,6 @@ void __init test_wp_bit(void) static void __init set_max_mapnr_init(void) { #ifdef CONFIG_HIGHMEM - highmem_start_page = pfn_to_page(highstart_pfn); max_mapnr = num_physpages = highend_pfn; #else max_mapnr = num_physpages = max_low_pfn; diff --git a/linux-2.6.10-xen-sparse/arch/xen/i386/mm/ioremap.c b/linux-2.6.11-xen-sparse/arch/xen/i386/mm/ioremap.c similarity index 96% rename from linux-2.6.10-xen-sparse/arch/xen/i386/mm/ioremap.c rename to linux-2.6.11-xen-sparse/arch/xen/i386/mm/ioremap.c index 6d675b449a..e8d48949d3 100644 --- a/linux-2.6.10-xen-sparse/arch/xen/i386/mm/ioremap.c +++ b/linux-2.6.11-xen-sparse/arch/xen/i386/mm/ioremap.c @@ -121,7 +121,7 @@ void __iomem * __ioremap(unsigned long phys_addr, unsigned long size, unsigned l /* * Ok, go for it.. */ - area = get_vm_area(size, VM_IOREMAP); + area = get_vm_area(size, VM_IOREMAP | (flags << 20)); if (!area) return NULL; area->phys_addr = phys_addr; @@ -206,11 +206,12 @@ void iounmap(volatile void __iomem *addr) if (!p) { printk("__iounmap: bad address %p\n", addr); return; - } + } - if (p->flags && is_local_lowmem(p->phys_addr)) { + if ((p->flags >> 20) && is_local_lowmem(p->phys_addr)) { + /* p->size includes the guard page, but cpa doesn't like that */ change_page_attr(virt_to_page(bus_to_virt(p->phys_addr)), - p->size >> PAGE_SHIFT, + (p->size - PAGE_SIZE) >> PAGE_SHIFT, PAGE_KERNEL); global_flush_tlb(); } @@ -361,8 +362,14 @@ int __direct_remap_area_pages(struct mm_struct *mm, BUG(); spin_lock(&mm->page_table_lock); do { + pud_t *pud; + pmd_t *pmd; + error = -ENOMEM; - pmd_t *pmd = pmd_alloc(mm, dir, address); + pud = pud_alloc(mm, dir, address); + if (!pud) + break; + pmd = pmd_alloc(mm, dir, address); if (!pmd) break; error = 0; diff --git a/linux-2.6.10-xen-sparse/arch/xen/i386/mm/pageattr.c b/linux-2.6.11-xen-sparse/arch/xen/i386/mm/pageattr.c similarity index 84% rename from linux-2.6.10-xen-sparse/arch/xen/i386/mm/pageattr.c rename to linux-2.6.11-xen-sparse/arch/xen/i386/mm/pageattr.c index 7c961c7a7d..1b79c7e684 100644 --- a/linux-2.6.10-xen-sparse/arch/xen/i386/mm/pageattr.c +++ b/linux-2.6.11-xen-sparse/arch/xen/i386/mm/pageattr.c @@ -13,17 +13,21 @@ #include #include -static spinlock_t cpa_lock = SPIN_LOCK_UNLOCKED; +static DEFINE_SPINLOCK(cpa_lock); static struct list_head df_list = LIST_HEAD_INIT(df_list); pte_t *lookup_address(unsigned long address) { - pgd_t *pgd = pgd_offset_k(address); + pgd_t *pgd = pgd_offset_k(address); + pud_t *pud; pmd_t *pmd; if (pgd_none(*pgd)) return NULL; - pmd = pmd_offset(pgd, address); + pud = pud_offset(pgd, address); + if (pud_none(*pud)) + return NULL; + pmd = pmd_offset(pud, address); if (pmd_none(*pmd)) return NULL; if (pmd_large(*pmd)) @@ -77,9 +81,11 @@ static void set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte) spin_lock_irqsave(&pgd_lock, flags); for (page = pgd_list; page; page = (struct page *)page->index) { pgd_t *pgd; + pud_t *pud; pmd_t *pmd; pgd = (pgd_t *)page_address(page) + pgd_index(address); - pmd = pmd_offset(pgd, address); + pud = pud_offset(pgd, address); + pmd = pmd_offset(pud, address); set_pte_atomic((pte_t *)pmd, pte); } spin_unlock_irqrestore(&pgd_lock, flags); @@ -92,7 +98,7 @@ static void set_pmd_pte(pte_t *kpte, unsigned long address, pte_t pte) static inline void revert_page(struct page *kpte_page, unsigned long address) { pte_t *linear = (pte_t *) - pmd_offset(pgd_offset(&init_mm, address), address); + pmd_offset(pud_offset(pgd_offset_k(address), address), address); set_pmd_pte(linear, address, pfn_pte((__pa(address) & LARGE_PAGE_MASK) >> PAGE_SHIFT, PAGE_KERNEL_LARGE)); @@ -105,10 +111,7 @@ __change_page_attr(struct page *page, pgprot_t prot) unsigned long address; struct page *kpte_page; -#ifdef CONFIG_HIGHMEM - if (page >= highmem_start_page) - BUG(); -#endif + BUG_ON(PageHighMem(page)); address = (unsigned long)page_address(page); kpte = lookup_address(address); @@ -117,27 +120,35 @@ __change_page_attr(struct page *page, pgprot_t prot) kpte_page = virt_to_page(kpte); if (pgprot_val(prot) != pgprot_val(PAGE_KERNEL)) { if ((pte_val(*kpte) & _PAGE_PSE) == 0) { - pte_t old = *kpte; - pte_t standard = mk_pte(page, PAGE_KERNEL); set_pte_batched(kpte, mk_pte(page, prot)); - if (pte_same(old,standard)) - get_page(kpte_page); } else { struct page *split = split_large_page(address, prot); if (!split) return -ENOMEM; - get_page(kpte_page); set_pmd_pte(kpte,address,mk_pte(split, PAGE_KERNEL)); + kpte_page = split; } + get_page(kpte_page); } else if ((pte_val(*kpte) & _PAGE_PSE) == 0) { set_pte_batched(kpte, mk_pte(page, PAGE_KERNEL)); __put_page(kpte_page); - } + } else + BUG(); - if (cpu_has_pse && (page_count(kpte_page) == 1)) { - list_add(&kpte_page->lru, &df_list); - revert_page(kpte_page, address); - } + /* + * If the pte was reserved, it means it was created at boot + * time (not via split_large_page) and in turn we must not + * replace it with a largepage. + */ + if (!PageReserved(kpte_page)) { + /* memleak and potential failed 2M page regeneration */ + BUG_ON(!page_count(kpte_page)); + + if (cpu_has_pse && (page_count(kpte_page) == 1)) { + list_add(&kpte_page->lru, &df_list); + revert_page(kpte_page, address); + } + } return 0; } diff --git a/linux-2.6.10-xen-sparse/arch/xen/i386/mm/pgtable.c b/linux-2.6.11-xen-sparse/arch/xen/i386/mm/pgtable.c similarity index 90% rename from linux-2.6.10-xen-sparse/arch/xen/i386/mm/pgtable.c rename to linux-2.6.11-xen-sparse/arch/xen/i386/mm/pgtable.c index 0d8f833bd9..30e929ad51 100644 --- a/linux-2.6.10-xen-sparse/arch/xen/i386/mm/pgtable.c +++ b/linux-2.6.11-xen-sparse/arch/xen/i386/mm/pgtable.c @@ -65,6 +65,7 @@ void show_mem(void) static void set_pte_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags) { pgd_t *pgd; + pud_t *pud; pmd_t *pmd; pte_t *pte; @@ -73,7 +74,12 @@ static void set_pte_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags) BUG(); return; } - pmd = pmd_offset(pgd, vaddr); + pud = pud_offset(pgd, vaddr); + if (pud_none(*pud)) { + BUG(); + return; + } + pmd = pmd_offset(pud, vaddr); if (pmd_none(*pmd)) { BUG(); return; @@ -97,6 +103,7 @@ static void set_pte_pfn_ma(unsigned long vaddr, unsigned long pfn, pgprot_t flags) { pgd_t *pgd; + pud_t *pud; pmd_t *pmd; pte_t *pte; @@ -105,7 +112,12 @@ static void set_pte_pfn_ma(unsigned long vaddr, unsigned long pfn, BUG(); return; } - pmd = pmd_offset(pgd, vaddr); + pud = pud_offset(pgd, vaddr); + if (pud_none(*pud)) { + BUG(); + return; + } + pmd = pmd_offset(pud, vaddr); if (pmd_none(*pmd)) { BUG(); return; @@ -130,6 +142,7 @@ static void set_pte_pfn_ma(unsigned long vaddr, unsigned long pfn, void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags) { pgd_t *pgd; + pud_t *pud; pmd_t *pmd; if (vaddr & (PMD_SIZE-1)) { /* vaddr is misaligned */ @@ -145,7 +158,8 @@ void set_pmd_pfn(unsigned long vaddr, unsigned long pfn, pgprot_t flags) printk ("set_pmd_pfn: pgd_none\n"); return; /* BUG(); */ } - pmd = pmd_offset(pgd, vaddr); + pud = pud_offset(pgd, vaddr); + pmd = pmd_offset(pud, vaddr); set_pmd(pmd, pfn_pmd(pfn, flags)); /* * It's enough to flush this one mapping. @@ -178,9 +192,8 @@ void __set_fixmap_ma (enum fixed_addresses idx, unsigned long phys, pgprot_t fla pte_t *pte_alloc_one_kernel(struct mm_struct *mm, unsigned long address) { - pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT); + pte_t *pte = (pte_t *)__get_free_page(GFP_KERNEL|__GFP_REPEAT|__GFP_ZERO); if (pte) { - clear_page(pte); make_page_readonly(pte); xen_flush_page_update_queue(); } @@ -216,13 +229,11 @@ struct page *pte_alloc_one(struct mm_struct *mm, unsigned long address) #ifdef CONFIG_HIGHPTE struct page *pte; - pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT, 0); + pte = alloc_pages(GFP_KERNEL|__GFP_HIGHMEM|__GFP_REPEAT|__GFP_ZERO, 0); if (pte == NULL) return pte; - if (pte >= highmem_start_page) { - clear_highpage(pte); + if (pte >= highmem_start_page) return pte; - } /* not a highmem page -- free page and grab one from the cache */ __free_page(pte); #endif @@ -263,7 +274,7 @@ void pmd_ctor(void *pmd, kmem_cache_t *cache, unsigned long flags) * recommendations and having no core impact whatsoever. * -- wli */ -spinlock_t pgd_lock = SPIN_LOCK_UNLOCKED; +DEFINE_SPINLOCK(pgd_lock); struct page *pgd_list; static inline void pgd_list_add(pgd_t *pgd) @@ -357,14 +368,15 @@ void pgd_free(pgd_t *pgd) if (PTRS_PER_PMD > 1) for (i = 0; i < USER_PTRS_PER_PGD; ++i) kmem_cache_free(pmd_cache, (void *)__va(pgd_val(pgd[i])-1)); - /* in the non-PAE case, clear_page_tables() clears user pgd entries */ + /* in the non-PAE case, clear_page_range() clears user pgd entries */ kmem_cache_free(pgd_cache, pgd); } void make_lowmem_page_readonly(void *va) { pgd_t *pgd = pgd_offset_k((unsigned long)va); - pmd_t *pmd = pmd_offset(pgd, (unsigned long)va); + pud_t *pud = pud_offset(pgd, (unsigned long)va); + pmd_t *pmd = pmd_offset(pud, (unsigned long)va); pte_t *pte = pte_offset_kernel(pmd, (unsigned long)va); queue_l1_entry_update(pte, (*(unsigned long *)pte)&~_PAGE_RW); } @@ -372,7 +384,8 @@ void make_lowmem_page_readonly(void *va) void make_lowmem_page_writable(void *va) { pgd_t *pgd = pgd_offset_k((unsigned long)va); - pmd_t *pmd = pmd_offset(pgd, (unsigned long)va); + pud_t *pud = pud_offset(pgd, (unsigned long)va); + pmd_t *pmd = pmd_offset(pud, (unsigned long)va); pte_t *pte = pte_offset_kernel(pmd, (unsigned long)va); queue_l1_entry_update(pte, (*(unsigned long *)pte)|_PAGE_RW); } @@ -380,7 +393,8 @@ void make_lowmem_page_writable(void *va) void make_page_readonly(void *va) { pgd_t *pgd = pgd_offset_k((unsigned long)va); - pmd_t *pmd = pmd_offset(pgd, (unsigned long)va); + pud_t *pud = pud_offset(pgd, (unsigned long)va); + pmd_t *pmd = pmd_offset(pud, (unsigned long)va); pte_t *pte = pte_offset_kernel(pmd, (unsigned long)va); queue_l1_entry_update(pte, (*(unsigned long *)pte)&~_PAGE_RW); if ( (unsigned long)va >= (unsigned long)high_memory ) @@ -397,7 +411,8 @@ void make_page_readonly(void *va) void make_page_writable(void *va) { pgd_t *pgd = pgd_offset_k((unsigned long)va); - pmd_t *pmd = pmd_offset(pgd, (unsigned long)va); + pud_t *pud = pud_offset(pgd, (unsigned long)va); + pmd_t *pmd = pmd_offset(pud, (unsigned long)va); pte_t *pte = pte_offset_kernel(pmd, (unsigned long)va); queue_l1_entry_update(pte, (*(unsigned long *)pte)|_PAGE_RW); if ( (unsigned long)va >= (unsigned long)high_memory ) diff --git a/linux-2.6.10-xen-sparse/arch/xen/i386/pci/Makefile b/linux-2.6.11-xen-sparse/arch/xen/i386/pci/Makefile similarity index 100% rename from linux-2.6.10-xen-sparse/arch/xen/i386/pci/Makefile rename to linux-2.6.11-xen-sparse/arch/xen/i386/pci/Makefile diff --git a/linux-2.6.10-xen-sparse/arch/xen/i386/pci/direct.c b/linux-2.6.11-xen-sparse/arch/xen/i386/pci/direct.c similarity index 100% rename from linux-2.6.10-xen-sparse/arch/xen/i386/pci/direct.c rename to linux-2.6.11-xen-sparse/arch/xen/i386/pci/direct.c diff --git a/linux-2.6.10-xen-sparse/arch/xen/i386/pci/irq.c b/linux-2.6.11-xen-sparse/arch/xen/i386/pci/irq.c similarity index 97% rename from linux-2.6.10-xen-sparse/arch/xen/i386/pci/irq.c rename to linux-2.6.11-xen-sparse/arch/xen/i386/pci/irq.c index efd7e84040..3a5b7477a3 100644 --- a/linux-2.6.10-xen-sparse/arch/xen/i386/pci/irq.c +++ b/linux-2.6.11-xen-sparse/arch/xen/i386/pci/irq.c @@ -23,6 +23,8 @@ #include #include +static int pirq_enable_irq(struct pci_dev *dev); + /* * Never use: 0, 1, 2 (timer, keyboard, and cascade) * Avoid using: 13, 14 and 15 (FP error and IDE). @@ -88,7 +90,7 @@ void pcibios_penalize_isa_irq(int irq) pirq_penalize_isa_irq(irq); } -int pirq_enable_irq(struct pci_dev *dev) +static int pirq_enable_irq(struct pci_dev *dev) { int err; u8 pin; diff --git a/linux-2.6.10-xen-sparse/arch/xen/kernel/Makefile b/linux-2.6.11-xen-sparse/arch/xen/kernel/Makefile similarity index 100% rename from linux-2.6.10-xen-sparse/arch/xen/kernel/Makefile rename to linux-2.6.11-xen-sparse/arch/xen/kernel/Makefile diff --git a/linux-2.6.10-xen-sparse/arch/xen/kernel/ctrl_if.c b/linux-2.6.11-xen-sparse/arch/xen/kernel/ctrl_if.c similarity index 100% rename from linux-2.6.10-xen-sparse/arch/xen/kernel/ctrl_if.c rename to linux-2.6.11-xen-sparse/arch/xen/kernel/ctrl_if.c diff --git a/linux-2.6.10-xen-sparse/arch/xen/kernel/devmem.c b/linux-2.6.11-xen-sparse/arch/xen/kernel/devmem.c similarity index 100% rename from linux-2.6.10-xen-sparse/arch/xen/kernel/devmem.c rename to linux-2.6.11-xen-sparse/arch/xen/kernel/devmem.c diff --git a/linux-2.6.10-xen-sparse/arch/xen/kernel/evtchn.c b/linux-2.6.11-xen-sparse/arch/xen/kernel/evtchn.c similarity index 100% rename from linux-2.6.10-xen-sparse/arch/xen/kernel/evtchn.c rename to linux-2.6.11-xen-sparse/arch/xen/kernel/evtchn.c diff --git a/linux-2.6.10-xen-sparse/arch/xen/kernel/fixup.c b/linux-2.6.11-xen-sparse/arch/xen/kernel/fixup.c similarity index 100% rename from linux-2.6.10-xen-sparse/arch/xen/kernel/fixup.c rename to linux-2.6.11-xen-sparse/arch/xen/kernel/fixup.c diff --git a/linux-2.6.10-xen-sparse/arch/xen/kernel/gnttab.c b/linux-2.6.11-xen-sparse/arch/xen/kernel/gnttab.c similarity index 100% rename from linux-2.6.10-xen-sparse/arch/xen/kernel/gnttab.c rename to linux-2.6.11-xen-sparse/arch/xen/kernel/gnttab.c diff --git a/linux-2.6.10-xen-sparse/arch/xen/kernel/reboot.c b/linux-2.6.11-xen-sparse/arch/xen/kernel/reboot.c similarity index 100% rename from linux-2.6.10-xen-sparse/arch/xen/kernel/reboot.c rename to linux-2.6.11-xen-sparse/arch/xen/kernel/reboot.c diff --git a/linux-2.6.10-xen-sparse/arch/xen/kernel/skbuff.c b/linux-2.6.11-xen-sparse/arch/xen/kernel/skbuff.c similarity index 100% rename from linux-2.6.10-xen-sparse/arch/xen/kernel/skbuff.c rename to linux-2.6.11-xen-sparse/arch/xen/kernel/skbuff.c diff --git a/linux-2.6.10-xen-sparse/arch/xen/kernel/xen_proc.c b/linux-2.6.11-xen-sparse/arch/xen/kernel/xen_proc.c similarity index 100% rename from linux-2.6.10-xen-sparse/arch/xen/kernel/xen_proc.c rename to linux-2.6.11-xen-sparse/arch/xen/kernel/xen_proc.c diff --git a/linux-2.6.10-xen-sparse/drivers/Makefile b/linux-2.6.11-xen-sparse/drivers/Makefile similarity index 94% rename from linux-2.6.10-xen-sparse/drivers/Makefile rename to linux-2.6.11-xen-sparse/drivers/Makefile index 6aa99acc8b..5fab89854a 100644 --- a/linux-2.6.10-xen-sparse/drivers/Makefile +++ b/linux-2.6.11-xen-sparse/drivers/Makefile @@ -43,6 +43,7 @@ obj-$(CONFIG_DIO) += dio/ obj-$(CONFIG_SBUS) += sbus/ obj-$(CONFIG_ZORRO) += zorro/ obj-$(CONFIG_MAC) += macintosh/ +obj-$(CONFIG_ATA_OVER_ETH) += block/aoe/ obj-$(CONFIG_PARIDE) += block/paride/ obj-$(CONFIG_TC) += tc/ obj-$(CONFIG_USB) += usb/ @@ -60,4 +61,6 @@ obj-$(CONFIG_MCA) += mca/ obj-$(CONFIG_EISA) += eisa/ obj-$(CONFIG_CPU_FREQ) += cpufreq/ obj-$(CONFIG_MMC) += mmc/ +obj-$(CONFIG_INFINIBAND) += infiniband/ obj-y += firmware/ +obj-$(CONFIG_CRYPTO) += crypto/ diff --git a/linux-2.6.10-xen-sparse/drivers/char/mem.c b/linux-2.6.11-xen-sparse/drivers/char/mem.c similarity index 100% rename from linux-2.6.10-xen-sparse/drivers/char/mem.c rename to linux-2.6.11-xen-sparse/drivers/char/mem.c diff --git a/linux-2.6.10-xen-sparse/drivers/char/tty_io.c b/linux-2.6.11-xen-sparse/drivers/char/tty_io.c similarity index 98% rename from linux-2.6.10-xen-sparse/drivers/char/tty_io.c rename to linux-2.6.11-xen-sparse/drivers/char/tty_io.c index 2df07c8619..a8d33b5288 100644 --- a/linux-2.6.10-xen-sparse/drivers/char/tty_io.c +++ b/linux-2.6.11-xen-sparse/drivers/char/tty_io.c @@ -251,7 +251,7 @@ static void tty_set_termios_ldisc(struct tty_struct *tty, int num) * callers who will do ldisc lookups and cannot sleep. */ -static spinlock_t tty_ldisc_lock = SPIN_LOCK_UNLOCKED; +static DEFINE_SPINLOCK(tty_ldisc_lock); static DECLARE_WAIT_QUEUE_HEAD(tty_ldisc_wait); static struct tty_ldisc tty_ldiscs[NR_LDISCS]; /* line disc dispatch table */ @@ -329,7 +329,7 @@ void tty_ldisc_put(int disc) EXPORT_SYMBOL_GPL(tty_ldisc_put); -void tty_ldisc_assign(struct tty_struct *tty, struct tty_ldisc *ld) +static void tty_ldisc_assign(struct tty_struct *tty, struct tty_ldisc *ld) { tty->ldisc = *ld; tty->ldisc.refcount = 0; @@ -585,7 +585,7 @@ restart: /* * This routine returns a tty driver structure, given a device number */ -struct tty_driver *get_tty_driver(dev_t device, int *index) +static struct tty_driver *get_tty_driver(dev_t device, int *index) { struct tty_driver *p; @@ -692,7 +692,7 @@ static struct file_operations hung_up_tty_fops = { .release = tty_release, }; -static spinlock_t redirect_lock = SPIN_LOCK_UNLOCKED; +static DEFINE_SPINLOCK(redirect_lock); static struct file *redirect; /** @@ -746,7 +746,7 @@ EXPORT_SYMBOL_GPL(tty_ldisc_flush); * but doesn't hold any locks, so we need to make sure we have the appropriate * locks for what we're doing.. */ -void do_tty_hangup(void *data) +static void do_tty_hangup(void *data) { struct tty_struct *tty = (struct tty_struct *) data; struct file * cons_filp = NULL; @@ -920,9 +920,11 @@ void disassociate_ctty(int on_exit) lock_kernel(); + down(&tty_sem); tty = current->signal->tty; if (tty) { tty_pgrp = tty->pgrp; + up(&tty_sem); if (on_exit && tty->driver->type != TTY_DRIVER_TYPE_PTY) tty_vhangup(tty); } else { @@ -930,6 +932,7 @@ void disassociate_ctty(int on_exit) kill_pg(current->signal->tty_old_pgrp, SIGHUP, on_exit); kill_pg(current->signal->tty_old_pgrp, SIGCONT, on_exit); } + up(&tty_sem); unlock_kernel(); return; } @@ -939,15 +942,19 @@ void disassociate_ctty(int on_exit) kill_pg(tty_pgrp, SIGCONT, on_exit); } + /* Must lock changes to tty_old_pgrp */ + down(&tty_sem); current->signal->tty_old_pgrp = 0; tty->session = 0; tty->pgrp = -1; + /* Now clear signal->tty under the lock */ read_lock(&tasklist_lock); do_each_task_pid(current->signal->session, PIDTYPE_SID, p) { p->signal->tty = NULL; } while_each_task_pid(current->signal->session, PIDTYPE_SID, p); read_unlock(&tasklist_lock); + up(&tty_sem); unlock_kernel(); } @@ -1013,7 +1020,7 @@ static ssize_t tty_read(struct file * file, char __user * buf, size_t count, tty_ldisc_deref(ld); unlock_kernel(); if (i > 0) - inode->i_atime = CURRENT_TIME; + inode->i_atime = current_fs_time(inode->i_sb); return i; } @@ -1042,8 +1049,13 @@ static inline ssize_t do_tty_write( * * But if TTY_NO_WRITE_SPLIT is set, we should use a * big chunk-size.. + * + * The default chunk-size is 2kB, because the NTTY + * layer has problems with bigger chunks. It will + * claim to be able to handle more characters than + * it actually does. */ - chunk = 4096; + chunk = 2048; if (test_bit(TTY_NO_WRITE_SPLIT, &tty->flags)) chunk = 65536; if (count < chunk) @@ -1090,7 +1102,8 @@ static inline ssize_t do_tty_write( cond_resched(); } if (written) { - file->f_dentry->d_inode->i_mtime = CURRENT_TIME; + struct inode *inode = file->f_dentry->d_inode; + inode->i_mtime = current_fs_time(inode->i_sb); ret = written; } up(&tty->atomic_write); @@ -1173,12 +1186,6 @@ static int init_dev(struct tty_driver *driver, int idx, struct termios *ltp, **ltp_loc, *o_ltp, **o_ltp_loc; int retval=0; - /* - * Check whether we need to acquire the tty semaphore to avoid - * race conditions. For now, play it safe. - */ - down(&tty_sem); - /* check whether we're reopening an existing tty */ if (driver->flags & TTY_DRIVER_DEVPTS_MEM) { tty = devpts_get_tty(idx); @@ -1367,7 +1374,6 @@ success: /* All paths come through here to release the semaphore */ end_init: - up(&tty_sem); return retval; /* Release locally allocated memory ... nothing placed in slots */ @@ -1563,9 +1569,14 @@ static void release_dev(struct file * filp) * each iteration we avoid any problems. */ while (1) { + /* Guard against races with tty->count changes elsewhere and + opens on /dev/tty */ + + down(&tty_sem); tty_closing = tty->count <= 1; o_tty_closing = o_tty && (o_tty->count <= (pty_master ? 1 : 0)); + up(&tty_sem); do_sleep = 0; if (tty_closing) { @@ -1601,6 +1612,8 @@ static void release_dev(struct file * filp) * both sides, and we've completed the last operation that could * block, so it's safe to proceed with closing. */ + + down(&tty_sem); if (pty_master) { if (--o_tty->count < 0) { printk(KERN_WARNING "release_dev: bad pty slave count " @@ -1614,7 +1627,8 @@ static void release_dev(struct file * filp) tty->count, tty_name(tty, buf)); tty->count = 0; } - + up(&tty_sem); + /* * We've decremented tty->count, so we need to remove this file * descriptor off the tty->tty_files list; this serves two @@ -1761,10 +1775,14 @@ retry_open: noctty = filp->f_flags & O_NOCTTY; index = -1; retval = 0; + + down(&tty_sem); if (device == MKDEV(TTYAUX_MAJOR,0)) { - if (!current->signal->tty) + if (!current->signal->tty) { + up(&tty_sem); return -ENXIO; + } driver = current->signal->tty->driver; index = current->signal->tty->index; filp->f_flags |= O_NONBLOCK; /* Don't let /dev/tty block */ @@ -1789,14 +1807,18 @@ retry_open: noctty = 1; goto got_driver; } + up(&tty_sem); return -ENODEV; } driver = get_tty_driver(device, &index); - if (!driver) + if (!driver) { + up(&tty_sem); return -ENODEV; + } got_driver: retval = init_dev(driver, index, &tty); + up(&tty_sem); if (retval) return retval; @@ -1882,7 +1904,10 @@ static int ptmx_open(struct inode * inode, struct file * filp) } up(&allocated_ptys_lock); + down(&tty_sem); retval = init_dev(ptm_driver, index, &tty); + up(&tty_sem); + if (retval) goto out; @@ -2505,28 +2530,6 @@ out: tty_ldisc_deref(disc); } -/* - * Call the ldisc flush directly from a driver. This function may - * return an error and need retrying by the user. - */ - -int tty_push_data(struct tty_struct *tty, unsigned char *cp, unsigned char *fp, int count) -{ - int ret = 0; - struct tty_ldisc *disc; - - disc = tty_ldisc_ref(tty); - if(test_bit(TTY_DONT_FLIP, &tty->flags)) - ret = -EAGAIN; - else if(disc == NULL) - ret = -EIO; - else - disc->receive_buf(tty, cp, fp, count); - tty_ldisc_deref(disc); - return ret; - -} - /* * Routine which returns the baud rate of the tty * @@ -2905,8 +2908,8 @@ void __init console_init(void) So I haven't moved it. dwmw2 */ rs_360_init(); #endif - call = &__con_initcall_start; - while (call < &__con_initcall_end) { + call = __con_initcall_start; + while (call < __con_initcall_end) { (*call)(); call++; } diff --git a/linux-2.6.10-xen-sparse/drivers/xen/Makefile b/linux-2.6.11-xen-sparse/drivers/xen/Makefile similarity index 100% rename from linux-2.6.10-xen-sparse/drivers/xen/Makefile rename to linux-2.6.11-xen-sparse/drivers/xen/Makefile diff --git a/linux-2.6.10-xen-sparse/drivers/xen/balloon/Makefile b/linux-2.6.11-xen-sparse/drivers/xen/balloon/Makefile similarity index 100% rename from linux-2.6.10-xen-sparse/drivers/xen/balloon/Makefile rename to linux-2.6.11-xen-sparse/drivers/xen/balloon/Makefile diff --git a/linux-2.6.10-xen-sparse/drivers/xen/balloon/balloon.c b/linux-2.6.11-xen-sparse/drivers/xen/balloon/balloon.c similarity index 98% rename from linux-2.6.10-xen-sparse/drivers/xen/balloon/balloon.c rename to linux-2.6.11-xen-sparse/drivers/xen/balloon/balloon.c index ad951fa984..ee725d3d9d 100644 --- a/linux-2.6.10-xen-sparse/drivers/xen/balloon/balloon.c +++ b/linux-2.6.11-xen-sparse/drivers/xen/balloon/balloon.c @@ -137,12 +137,16 @@ static struct page *balloon_retrieve(void) static inline pte_t *get_ptep(unsigned long addr) { pgd_t *pgd; + pud_t *pud; pmd_t *pmd; pgd = pgd_offset_k(addr); if ( pgd_none(*pgd) || pgd_bad(*pgd) ) BUG(); - pmd = pmd_offset(pgd, addr); + pud = pud_offset(pgd, addr); + if ( pud_none(*pud) || pud_bad(*pud) ) BUG(); + + pmd = pmd_offset(pud, addr); if ( pmd_none(*pmd) || pmd_bad(*pmd) ) BUG(); return pte_offset_kernel(pmd, addr); diff --git a/linux-2.6.10-xen-sparse/drivers/xen/blkback/Makefile b/linux-2.6.11-xen-sparse/drivers/xen/blkback/Makefile similarity index 100% rename from linux-2.6.10-xen-sparse/drivers/xen/blkback/Makefile rename to linux-2.6.11-xen-sparse/drivers/xen/blkback/Makefile diff --git a/linux-2.6.10-xen-sparse/drivers/xen/blkback/blkback.c b/linux-2.6.11-xen-sparse/drivers/xen/blkback/blkback.c similarity index 100% rename from linux-2.6.10-xen-sparse/drivers/xen/blkback/blkback.c rename to linux-2.6.11-xen-sparse/drivers/xen/blkback/blkback.c diff --git a/linux-2.6.10-xen-sparse/drivers/xen/blkback/common.h b/linux-2.6.11-xen-sparse/drivers/xen/blkback/common.h similarity index 100% rename from linux-2.6.10-xen-sparse/drivers/xen/blkback/common.h rename to linux-2.6.11-xen-sparse/drivers/xen/blkback/common.h diff --git a/linux-2.6.10-xen-sparse/drivers/xen/blkback/control.c b/linux-2.6.11-xen-sparse/drivers/xen/blkback/control.c similarity index 100% rename from linux-2.6.10-xen-sparse/drivers/xen/blkback/control.c rename to linux-2.6.11-xen-sparse/drivers/xen/blkback/control.c diff --git a/linux-2.6.10-xen-sparse/drivers/xen/blkback/interface.c b/linux-2.6.11-xen-sparse/drivers/xen/blkback/interface.c similarity index 100% rename from linux-2.6.10-xen-sparse/drivers/xen/blkback/interface.c rename to linux-2.6.11-xen-sparse/drivers/xen/blkback/interface.c diff --git a/linux-2.6.10-xen-sparse/drivers/xen/blkback/vbd.c b/linux-2.6.11-xen-sparse/drivers/xen/blkback/vbd.c similarity index 100% rename from linux-2.6.10-xen-sparse/drivers/xen/blkback/vbd.c rename to linux-2.6.11-xen-sparse/drivers/xen/blkback/vbd.c diff --git a/linux-2.6.10-xen-sparse/drivers/xen/blkfront/Kconfig b/linux-2.6.11-xen-sparse/drivers/xen/blkfront/Kconfig similarity index 100% rename from linux-2.6.10-xen-sparse/drivers/xen/blkfront/Kconfig rename to linux-2.6.11-xen-sparse/drivers/xen/blkfront/Kconfig diff --git a/linux-2.6.10-xen-sparse/drivers/xen/blkfront/Makefile b/linux-2.6.11-xen-sparse/drivers/xen/blkfront/Makefile similarity index 100% rename from linux-2.6.10-xen-sparse/drivers/xen/blkfront/Makefile rename to linux-2.6.11-xen-sparse/drivers/xen/blkfront/Makefile diff --git a/linux-2.6.10-xen-sparse/drivers/xen/blkfront/blkfront.c b/linux-2.6.11-xen-sparse/drivers/xen/blkfront/blkfront.c similarity index 100% rename from linux-2.6.10-xen-sparse/drivers/xen/blkfront/blkfront.c rename to linux-2.6.11-xen-sparse/drivers/xen/blkfront/blkfront.c diff --git a/linux-2.6.10-xen-sparse/drivers/xen/blkfront/block.h b/linux-2.6.11-xen-sparse/drivers/xen/blkfront/block.h similarity index 100% rename from linux-2.6.10-xen-sparse/drivers/xen/blkfront/block.h rename to linux-2.6.11-xen-sparse/drivers/xen/blkfront/block.h diff --git a/linux-2.6.10-xen-sparse/drivers/xen/blkfront/vbd.c b/linux-2.6.11-xen-sparse/drivers/xen/blkfront/vbd.c similarity index 100% rename from linux-2.6.10-xen-sparse/drivers/xen/blkfront/vbd.c rename to linux-2.6.11-xen-sparse/drivers/xen/blkfront/vbd.c diff --git a/linux-2.6.10-xen-sparse/drivers/xen/console/Makefile b/linux-2.6.11-xen-sparse/drivers/xen/console/Makefile similarity index 100% rename from linux-2.6.10-xen-sparse/drivers/xen/console/Makefile rename to linux-2.6.11-xen-sparse/drivers/xen/console/Makefile diff --git a/linux-2.6.10-xen-sparse/drivers/xen/console/console.c b/linux-2.6.11-xen-sparse/drivers/xen/console/console.c similarity index 100% rename from linux-2.6.10-xen-sparse/drivers/xen/console/console.c rename to linux-2.6.11-xen-sparse/drivers/xen/console/console.c diff --git a/linux-2.6.10-xen-sparse/drivers/xen/evtchn/Makefile b/linux-2.6.11-xen-sparse/drivers/xen/evtchn/Makefile similarity index 100% rename from linux-2.6.10-xen-sparse/drivers/xen/evtchn/Makefile rename to linux-2.6.11-xen-sparse/drivers/xen/evtchn/Makefile diff --git a/linux-2.6.10-xen-sparse/drivers/xen/evtchn/evtchn.c b/linux-2.6.11-xen-sparse/drivers/xen/evtchn/evtchn.c similarity index 100% rename from linux-2.6.10-xen-sparse/drivers/xen/evtchn/evtchn.c rename to linux-2.6.11-xen-sparse/drivers/xen/evtchn/evtchn.c diff --git a/linux-2.6.10-xen-sparse/drivers/xen/netback/Makefile b/linux-2.6.11-xen-sparse/drivers/xen/netback/Makefile similarity index 100% rename from linux-2.6.10-xen-sparse/drivers/xen/netback/Makefile rename to linux-2.6.11-xen-sparse/drivers/xen/netback/Makefile diff --git a/linux-2.6.10-xen-sparse/drivers/xen/netback/common.h b/linux-2.6.11-xen-sparse/drivers/xen/netback/common.h similarity index 100% rename from linux-2.6.10-xen-sparse/drivers/xen/netback/common.h rename to linux-2.6.11-xen-sparse/drivers/xen/netback/common.h diff --git a/linux-2.6.10-xen-sparse/drivers/xen/netback/control.c b/linux-2.6.11-xen-sparse/drivers/xen/netback/control.c similarity index 100% rename from linux-2.6.10-xen-sparse/drivers/xen/netback/control.c rename to linux-2.6.11-xen-sparse/drivers/xen/netback/control.c diff --git a/linux-2.6.10-xen-sparse/drivers/xen/netback/interface.c b/linux-2.6.11-xen-sparse/drivers/xen/netback/interface.c similarity index 100% rename from linux-2.6.10-xen-sparse/drivers/xen/netback/interface.c rename to linux-2.6.11-xen-sparse/drivers/xen/netback/interface.c diff --git a/linux-2.6.10-xen-sparse/drivers/xen/netback/netback.c b/linux-2.6.11-xen-sparse/drivers/xen/netback/netback.c similarity index 100% rename from linux-2.6.10-xen-sparse/drivers/xen/netback/netback.c rename to linux-2.6.11-xen-sparse/drivers/xen/netback/netback.c diff --git a/linux-2.6.10-xen-sparse/drivers/xen/netfront/Kconfig b/linux-2.6.11-xen-sparse/drivers/xen/netfront/Kconfig similarity index 100% rename from linux-2.6.10-xen-sparse/drivers/xen/netfront/Kconfig rename to linux-2.6.11-xen-sparse/drivers/xen/netfront/Kconfig diff --git a/linux-2.6.10-xen-sparse/drivers/xen/netfront/Makefile b/linux-2.6.11-xen-sparse/drivers/xen/netfront/Makefile similarity index 100% rename from linux-2.6.10-xen-sparse/drivers/xen/netfront/Makefile rename to linux-2.6.11-xen-sparse/drivers/xen/netfront/Makefile diff --git a/linux-2.6.10-xen-sparse/drivers/xen/netfront/netfront.c b/linux-2.6.11-xen-sparse/drivers/xen/netfront/netfront.c similarity index 100% rename from linux-2.6.10-xen-sparse/drivers/xen/netfront/netfront.c rename to linux-2.6.11-xen-sparse/drivers/xen/netfront/netfront.c diff --git a/linux-2.6.10-xen-sparse/drivers/xen/privcmd/Makefile b/linux-2.6.11-xen-sparse/drivers/xen/privcmd/Makefile similarity index 100% rename from linux-2.6.10-xen-sparse/drivers/xen/privcmd/Makefile rename to linux-2.6.11-xen-sparse/drivers/xen/privcmd/Makefile diff --git a/linux-2.6.10-xen-sparse/drivers/xen/privcmd/privcmd.c b/linux-2.6.11-xen-sparse/drivers/xen/privcmd/privcmd.c similarity index 100% rename from linux-2.6.10-xen-sparse/drivers/xen/privcmd/privcmd.c rename to linux-2.6.11-xen-sparse/drivers/xen/privcmd/privcmd.c diff --git a/linux-2.6.10-xen-sparse/include/asm-generic/pgtable.h b/linux-2.6.11-xen-sparse/include/asm-generic/pgtable.h similarity index 100% rename from linux-2.6.10-xen-sparse/include/asm-generic/pgtable.h rename to linux-2.6.11-xen-sparse/include/asm-generic/pgtable.h diff --git a/linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/desc.h b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/desc.h similarity index 94% rename from linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/desc.h rename to linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/desc.h index 3cebc41697..abc3987d71 100644 --- a/linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/desc.h +++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/desc.h @@ -128,6 +128,15 @@ static inline void load_LDT(mm_context_t *pc) put_cpu(); } +static inline unsigned long get_desc_base(unsigned long *desc) +{ + unsigned long base; + base = ((desc[0] >> 16) & 0x0000ffff) | + ((desc[1] << 16) & 0x00ff0000) | + (desc[1] & 0xff000000); + return base; +} + #endif /* !__ASSEMBLY__ */ #endif diff --git a/linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/dma-mapping.h b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/dma-mapping.h similarity index 100% rename from linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/dma-mapping.h rename to linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/dma-mapping.h diff --git a/linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/fixmap.h b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/fixmap.h similarity index 96% rename from linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/fixmap.h rename to linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/fixmap.h index cf12d309e3..f86762fd1e 100644 --- a/linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/fixmap.h +++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/fixmap.h @@ -122,7 +122,9 @@ extern void __set_fixmap_ma (enum fixed_addresses idx, #define FIXADDR_TOP ((unsigned long)__FIXADDR_TOP) #define __FIXADDR_SIZE (__end_of_permanent_fixed_addresses << PAGE_SHIFT) -#define FIXADDR_START (FIXADDR_TOP - __FIXADDR_SIZE) +#define __FIXADDR_BOOT_SIZE (__end_of_fixed_addresses << PAGE_SHIFT) +#define FIXADDR_START (FIXADDR_TOP - __FIXADDR_SIZE) +#define FIXADDR_BOOT_START (FIXADDR_TOP - __FIXADDR_BOOT_SIZE) #define __fix_to_virt(x) (FIXADDR_TOP - ((x) << PAGE_SHIFT)) #define __virt_to_fix(x) ((FIXADDR_TOP - ((x)&PAGE_MASK)) >> PAGE_SHIFT) diff --git a/linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/floppy.h b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/floppy.h similarity index 100% rename from linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/floppy.h rename to linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/floppy.h diff --git a/linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/highmem.h b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/highmem.h similarity index 85% rename from linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/highmem.h rename to linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/highmem.h index f608a13755..8273e2db97 100644 --- a/linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/highmem.h +++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/highmem.h @@ -40,16 +40,27 @@ extern void kmap_init(void); * easily, subsequent pte tables have to be allocated in one physical * chunk of RAM. */ -#if NR_CPUS <= 32 -#define PKMAP_BASE (HYPERVISOR_VIRT_START - (1<<23)) -#else -#define PKMAP_BASE (HYPERVISOR_VIRT_START - (1<<23) - 0x200000UL) -#endif #ifdef CONFIG_X86_PAE #define LAST_PKMAP 512 #else #define LAST_PKMAP 1024 #endif +/* + * Ordering is: + * + * FIXADDR_TOP + * fixed_addresses + * FIXADDR_START + * temp fixed addresses + * FIXADDR_BOOT_START + * Persistent kmap area + * PKMAP_BASE + * VMALLOC_END + * Vmalloc area + * VMALLOC_START + * high_memory + */ +#define PKMAP_BASE ( (FIXADDR_BOOT_START - PAGE_SIZE*(LAST_PKMAP + 1)) & PMD_MASK ) #define LAST_PKMAP_MASK (LAST_PKMAP-1) #define PKMAP_NR(virt) ((virt-PKMAP_BASE) >> PAGE_SHIFT) #define PKMAP_ADDR(nr) (PKMAP_BASE + ((nr) << PAGE_SHIFT)) diff --git a/linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/io.h b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/io.h similarity index 100% rename from linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/io.h rename to linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/io.h diff --git a/linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/mach-xen/irq_vectors.h b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mach-xen/irq_vectors.h similarity index 100% rename from linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/mach-xen/irq_vectors.h rename to linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mach-xen/irq_vectors.h diff --git a/linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/mach-xen/setup_arch_post.h b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mach-xen/setup_arch_post.h similarity index 100% rename from linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/mach-xen/setup_arch_post.h rename to linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mach-xen/setup_arch_post.h diff --git a/linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/mach-xen/setup_arch_pre.h b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mach-xen/setup_arch_pre.h similarity index 100% rename from linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/mach-xen/setup_arch_pre.h rename to linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mach-xen/setup_arch_pre.h diff --git a/linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/mmu_context.h b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mmu_context.h similarity index 100% rename from linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/mmu_context.h rename to linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/mmu_context.h diff --git a/linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/msr.h b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/msr.h similarity index 100% rename from linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/msr.h rename to linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/msr.h diff --git a/linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/page.h b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/page.h similarity index 93% rename from linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/page.h rename to linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/page.h index da282bd498..4b5ed479e1 100644 --- a/linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/page.h +++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/page.h @@ -41,6 +41,9 @@ #else +#define alloc_zeroed_user_highpage(vma, vaddr) alloc_page_vma(GFP_HIGHUSER | __GFP_ZERO, vma, vaddr) +#define __HAVE_ARCH_ALLOC_ZEROED_USER_HIGHPAGE + /* * On older X86 processors it's not a win to use MMX here it seems. * Maybe the K6-III ? @@ -81,11 +84,12 @@ typedef struct { unsigned long pte_low, pte_high; } pte_t; typedef struct { unsigned long long pmd; } pmd_t; typedef struct { unsigned long long pgd; } pgd_t; typedef struct { unsigned long long pgprot; } pgprot_t; +#define pmd_val(x) ((x).pmd) #define pte_val(x) ((x).pte_low | ((unsigned long long)(x).pte_high << 32)) +#define __pmd(x) ((pmd_t) { (x) } ) #define HPAGE_SHIFT 21 #else typedef struct { unsigned long pte_low; } pte_t; -typedef struct { unsigned long pmd; } pmd_t; typedef struct { unsigned long pgd; } pgd_t; typedef struct { unsigned long pgprot; } pgprot_t; #define boot_pte_t pte_t /* or would you rather have a typedef */ @@ -104,13 +108,12 @@ typedef struct { unsigned long pgprot; } pgprot_t; #endif -static inline unsigned long pmd_val(pmd_t x) +static inline unsigned long pgd_val(pgd_t x) { - unsigned long ret = x.pmd; + unsigned long ret = x.pgd; if (ret) ret = machine_to_phys(ret); return ret; } -#define pgd_val(x) ({ BUG(); (unsigned long)0; }) #define pgprot_val(x) ((x).pgprot) static inline pte_t __pte(unsigned long x) @@ -119,12 +122,11 @@ static inline pte_t __pte(unsigned long x) return ((pte_t) { (x) }); } #define __pte_ma(x) ((pte_t) { (x) } ) -static inline pmd_t __pmd(unsigned long x) +static inline pgd_t __pgd(unsigned long x) { if ((x & 1)) x = phys_to_machine(x); - return ((pmd_t) { (x) }); + return ((pgd_t) { (x) }); } -#define __pgd(x) ({ BUG(); (pgprot_t) { 0 }; }) #define __pgprot(x) ((pgprot_t) { (x) } ) #endif /* !__ASSEMBLY__ */ diff --git a/linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/param.h b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/param.h similarity index 100% rename from linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/param.h rename to linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/param.h diff --git a/linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/pci.h b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/pci.h similarity index 100% rename from linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/pci.h rename to linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/pci.h diff --git a/linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/pgalloc.h b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/pgalloc.h similarity index 70% rename from linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/pgalloc.h rename to linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/pgalloc.h index e9bf5f50b5..520e98d072 100644 --- a/linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/pgalloc.h +++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/pgalloc.h @@ -11,17 +11,15 @@ #define pmd_populate_kernel(mm, pmd, pte) \ set_pmd(pmd, __pmd(_PAGE_TABLE + __pa(pte))) -static inline void pmd_populate(struct mm_struct *mm, pmd_t *pmd, struct page *pte) -{ - set_pmd(pmd, __pmd(_PAGE_TABLE + - ((unsigned long long)page_to_pfn(pte) << - (unsigned long long) PAGE_SHIFT))); - flush_page_update_queue(); -} +#define pmd_populate(mm, pmd, pte) do { \ + set_pmd(pmd, __pmd(_PAGE_TABLE + \ + ((unsigned long long)page_to_pfn(pte) << \ + (unsigned long long) PAGE_SHIFT))); \ + flush_page_update_queue(); \ +} while (0) /* * Allocate and free page tables. */ - extern pgd_t *pgd_alloc(struct mm_struct *); extern void pgd_free(pgd_t *pgd); @@ -39,16 +37,15 @@ extern void pte_free(struct page *pte); #define __pte_free_tlb(tlb,pte) tlb_remove_page((tlb),(pte)) +#ifdef CONFIG_X86_PAE /* - * allocating and freeing a pmd is trivial: the 1-entry pmd is - * inside the pgd, so has no extra memory associated with it. - * (In the PAE case we free the pmds as part of the pgd.) + * In the PAE case we free the pmds as part of the pgd. */ - #define pmd_alloc_one(mm, addr) ({ BUG(); ((pmd_t *)2); }) #define pmd_free(x) do { } while (0) #define __pmd_free_tlb(tlb,x) do { } while (0) -#define pgd_populate(mm, pmd, pte) BUG() +#define pud_populate(mm, pmd, pte) BUG() +#endif #define check_pgt_cache() do { } while (0) diff --git a/linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/pgtable-2level-defs.h b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/pgtable-2level-defs.h similarity index 90% rename from linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/pgtable-2level-defs.h rename to linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/pgtable-2level-defs.h index 2afc4fba6f..2be63665a4 100644 --- a/linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/pgtable-2level-defs.h +++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/pgtable-2level-defs.h @@ -13,8 +13,6 @@ * the i386 is two-level, so we don't really have any * PMD directory physically. */ -#define PMD_SHIFT 22 -#define PTRS_PER_PMD 1 #define PTRS_PER_PTE 1024 diff --git a/linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/pgtable-2level.h b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/pgtable-2level.h similarity index 83% rename from linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/pgtable-2level.h rename to linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/pgtable-2level.h index 36a2420a29..ac4fc891e8 100644 --- a/linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/pgtable-2level.h +++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/pgtable-2level.h @@ -1,23 +1,13 @@ #ifndef _I386_PGTABLE_2LEVEL_H #define _I386_PGTABLE_2LEVEL_H +#include + #define pte_ERROR(e) \ printk("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, (e).pte_low) -#define pmd_ERROR(e) \ - printk("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pmd_val(e)) #define pgd_ERROR(e) \ printk("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e)) -/* - * The "pgd_xxx()" functions here are trivial for a folded two-level - * setup: the pgd is never bad, and a pmd always exists (as it's folded - * into the pgd entry) - */ -static inline int pgd_none(pgd_t pgd) { return 0; } -static inline int pgd_bad(pgd_t pgd) { return 0; } -static inline int pgd_present(pgd_t pgd) { return 1; } -#define pgd_clear(xp) do { } while (0) - /* * Certain architectures need to do special things when PTEs * within a page table are directly modified. Thus, the following @@ -27,20 +17,7 @@ static inline int pgd_present(pgd_t pgd) { return 1; } queue_l1_entry_update(pteptr, (pteval).pte_low) #define set_pte(pteptr, pteval) (*(pteptr) = pteval) #define set_pte_atomic(pteptr, pteval) set_pte(pteptr,pteval) -/* - * (pmds are folded into pgds so this doesn't get actually called, - * but the define is needed for a generic inline function.) - */ -#define set_pmd(pmdptr, pmdval) xen_l2_entry_update((pmdptr), (pmdval).pmd) -#define set_pgd(pgdptr, pgdval) ((void)0) - -#define pgd_page(pgd) \ -((unsigned long) __va(pgd_val(pgd) & PAGE_MASK)) - -static inline pmd_t * pmd_offset(pgd_t * dir, unsigned long address) -{ - return (pmd_t *) dir; -} +#define set_pmd(pmdptr, pmdval) xen_l2_entry_update((pmdptr), (pmdval)) /* * A note on implementation of this atomic 'get-and-clear' operation. @@ -98,6 +75,11 @@ static inline pte_t ptep_get_and_clear(pte_t *xp) #define pfn_pte_ma(pfn, prot) __pte_ma(((pfn) << PAGE_SHIFT) | pgprot_val(prot)) #define pfn_pmd(pfn, prot) __pmd(((pfn) << PAGE_SHIFT) | pgprot_val(prot)) +#define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT)) + +#define pmd_page_kernel(pmd) \ +((unsigned long) __va(pmd_val(pmd) & PAGE_MASK)) + /* * All present user pages are user-executable: */ diff --git a/linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/pgtable.h b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/pgtable.h similarity index 97% rename from linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/pgtable.h rename to linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/pgtable.h index 3c0ae34e97..ddbb68e681 100644 --- a/linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/pgtable.h +++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/pgtable.h @@ -54,12 +54,12 @@ void paging_init(void); */ #ifdef CONFIG_X86_PAE # include +# define PMD_SIZE (1UL << PMD_SHIFT) +# define PMD_MASK (~(PMD_SIZE-1)) #else # include #endif -#define PMD_SIZE (1UL << PMD_SHIFT) -#define PMD_MASK (~(PMD_SIZE-1)) #define PGDIR_SIZE (1UL << PGDIR_SHIFT) #define PGDIR_MASK (~(PGDIR_SIZE-1)) @@ -316,20 +316,13 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) #define page_pte(page) page_pte_prot(page, __pgprot(0)) -#define pmd_page_kernel(pmd) \ -((unsigned long) __va(pmd_val(pmd) & PAGE_MASK)) - #define pmd_clear(xp) do { \ set_pmd(xp, __pmd(0)); \ xen_flush_page_update_queue(); \ } while (0) -#ifndef CONFIG_DISCONTIGMEM -#define pmd_page(pmd) (pfn_to_page(pmd_val(pmd) >> PAGE_SHIFT)) -#endif /* !CONFIG_DISCONTIGMEM */ - #define pmd_large(pmd) \ - ((pmd_val(pmd) & (_PAGE_PSE|_PAGE_PRESENT)) == (_PAGE_PSE|_PAGE_PRESENT)) +((pmd_val(pmd) & (_PAGE_PSE|_PAGE_PRESENT)) == (_PAGE_PSE|_PAGE_PRESENT)) /* * the pgd page can be thought of an array like this: pgd_t[PTRS_PER_PGD] @@ -338,6 +331,7 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) * control the given virtual address */ #define pgd_index(address) (((address) >> PGDIR_SHIFT) & (PTRS_PER_PGD-1)) +#define pgd_index_k(addr) pgd_index(addr) /* * pgd_offset() returns a (pgd_t *) @@ -391,6 +385,8 @@ extern pte_t *lookup_address(unsigned long address); static inline int set_kernel_exec(unsigned long vaddr, int enable) { return 0;} #endif +extern void noexec_setup(const char *str); + #if defined(CONFIG_HIGHPTE) #define pte_offset_map(dir, address) \ ((pte_t *)kmap_atomic_pte(pmd_page(*(dir)),KM_PTE0) + \ @@ -463,7 +459,8 @@ void make_pages_writable(void *va, unsigned int nr); #define arbitrary_virt_to_machine(__va) \ ({ \ pgd_t *__pgd = pgd_offset_k((unsigned long)(__va)); \ - pmd_t *__pmd = pmd_offset(__pgd, (unsigned long)(__va)); \ + pud_t *__pud = pud_offset(__pgd, (unsigned long)(__va)); \ + pmd_t *__pmd = pmd_offset(__pud, (unsigned long)(__va)); \ pte_t *__pte = pte_offset_kernel(__pmd, (unsigned long)(__va)); \ unsigned long __pa = (*(unsigned long *)__pte) & PAGE_MASK; \ __pa | ((unsigned long)(__va) & (PAGE_SIZE-1)); \ diff --git a/linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/processor.h b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/processor.h similarity index 97% rename from linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/processor.h rename to linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/processor.h index 53b71d3113..a6a5a9cf83 100644 --- a/linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/processor.h +++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/processor.h @@ -65,6 +65,7 @@ struct cpuinfo_x86 { int f00f_bug; int coma_bug; unsigned long loops_per_jiffy; + unsigned char x86_num_cores; } __attribute__((__aligned__(SMP_CACHE_BYTES))); #define X86_VENDOR_INTEL 0 @@ -97,6 +98,7 @@ extern struct cpuinfo_x86 cpu_data[]; #define current_cpu_data boot_cpu_data #endif +extern int phys_proc_id[NR_CPUS]; extern char ignore_fpu_irq; extern void identify_cpu(struct cpuinfo_x86 *); @@ -104,6 +106,12 @@ extern void print_cpu_info(struct cpuinfo_x86 *); extern unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c); extern void dodgy_tsc(void); +#ifdef CONFIG_X86_HT +extern void detect_ht(struct cpuinfo_x86 *c); +#else +static inline void detect_ht(struct cpuinfo_x86 *c) {} +#endif + /* * EFLAGS bits */ @@ -127,6 +135,8 @@ extern void dodgy_tsc(void); /* * Generic CPUID function + * clear %ecx since some cpus (Cyrix MII) do not set or clear %ecx + * resulting in stale register contents being returned. */ static inline void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx) { @@ -135,7 +145,7 @@ static inline void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx) "=b" (*ebx), "=c" (*ecx), "=d" (*edx) - : "0" (op)); + : "0" (op), "c"(0)); } /* @@ -270,11 +280,6 @@ static inline void clear_in_cr4 (unsigned long mask) outb((data), 0x23); \ } while (0) -/* - * Bus types (default is ISA, but people can check others with these..) - */ -extern int MCA_bus; - static inline void __monitor(const void *eax, unsigned long ecx, unsigned long edx) { @@ -299,6 +304,9 @@ extern unsigned int machine_submodel_id; extern unsigned int BIOS_revision; extern unsigned int mca_pentium_flag; +/* Boot loader type from the setup header */ +extern int bootloader_type; + /* * User space process size: 3GB (default). */ diff --git a/linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/ptrace.h b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/ptrace.h similarity index 92% rename from linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/ptrace.h rename to linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/ptrace.h index 2036837c50..aa2e6248a9 100644 --- a/linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/ptrace.h +++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/ptrace.h @@ -55,6 +55,8 @@ struct pt_regs { #define PTRACE_SET_THREAD_AREA 26 #ifdef __KERNEL__ +struct task_struct; +extern void send_sigtrap(struct task_struct *tsk, struct pt_regs *regs, int error_code); #define user_mode(regs) ((VM_MASK & (regs)->eflags) || (2 & (regs)->xcs)) #define instruction_pointer(regs) ((regs)->eip) #if defined(CONFIG_SMP) && defined(CONFIG_FRAME_POINTER) diff --git a/linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/segment.h b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/segment.h similarity index 100% rename from linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/segment.h rename to linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/segment.h diff --git a/linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/setup.h b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/setup.h similarity index 100% rename from linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/setup.h rename to linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/setup.h diff --git a/linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/synch_bitops.h b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/synch_bitops.h similarity index 100% rename from linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/synch_bitops.h rename to linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/synch_bitops.h diff --git a/linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/system.h b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/system.h similarity index 99% rename from linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/system.h rename to linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/system.h index b8eb62d37a..b1badb3b67 100644 --- a/linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/system.h +++ b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/system.h @@ -517,5 +517,6 @@ void disable_hlt(void); void enable_hlt(void); extern int es7000_plat; +void cpu_idle_wait(void); #endif diff --git a/linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/tlbflush.h b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/tlbflush.h similarity index 100% rename from linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/tlbflush.h rename to linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/tlbflush.h diff --git a/linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/vga.h b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/vga.h similarity index 100% rename from linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/vga.h rename to linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/vga.h diff --git a/linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/xor.h b/linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/xor.h similarity index 100% rename from linux-2.6.10-xen-sparse/include/asm-xen/asm-i386/xor.h rename to linux-2.6.11-xen-sparse/include/asm-xen/asm-i386/xor.h diff --git a/linux-2.6.10-xen-sparse/include/asm-xen/balloon.h b/linux-2.6.11-xen-sparse/include/asm-xen/balloon.h similarity index 100% rename from linux-2.6.10-xen-sparse/include/asm-xen/balloon.h rename to linux-2.6.11-xen-sparse/include/asm-xen/balloon.h diff --git a/linux-2.6.10-xen-sparse/include/asm-xen/ctrl_if.h b/linux-2.6.11-xen-sparse/include/asm-xen/ctrl_if.h similarity index 100% rename from linux-2.6.10-xen-sparse/include/asm-xen/ctrl_if.h rename to linux-2.6.11-xen-sparse/include/asm-xen/ctrl_if.h diff --git a/linux-2.6.10-xen-sparse/include/asm-xen/evtchn.h b/linux-2.6.11-xen-sparse/include/asm-xen/evtchn.h similarity index 100% rename from linux-2.6.10-xen-sparse/include/asm-xen/evtchn.h rename to linux-2.6.11-xen-sparse/include/asm-xen/evtchn.h diff --git a/linux-2.6.10-xen-sparse/include/asm-xen/foreign_page.h b/linux-2.6.11-xen-sparse/include/asm-xen/foreign_page.h similarity index 100% rename from linux-2.6.10-xen-sparse/include/asm-xen/foreign_page.h rename to linux-2.6.11-xen-sparse/include/asm-xen/foreign_page.h diff --git a/linux-2.6.10-xen-sparse/include/asm-xen/gnttab.h b/linux-2.6.11-xen-sparse/include/asm-xen/gnttab.h similarity index 100% rename from linux-2.6.10-xen-sparse/include/asm-xen/gnttab.h rename to linux-2.6.11-xen-sparse/include/asm-xen/gnttab.h diff --git a/linux-2.6.10-xen-sparse/include/asm-xen/hypervisor.h b/linux-2.6.11-xen-sparse/include/asm-xen/hypervisor.h similarity index 98% rename from linux-2.6.10-xen-sparse/include/asm-xen/hypervisor.h rename to linux-2.6.11-xen-sparse/include/asm-xen/hypervisor.h index ef755a4e7d..9882429a96 100644 --- a/linux-2.6.10-xen-sparse/include/asm-xen/hypervisor.h +++ b/linux-2.6.11-xen-sparse/include/asm-xen/hypervisor.h @@ -38,6 +38,7 @@ #include #include #include +#include /* arch/xen/i386/kernel/setup.c */ union xen_start_info_union @@ -66,7 +67,7 @@ void lgdt_finish(void); extern unsigned int mmu_update_queue_idx; void queue_l1_entry_update(pte_t *ptr, unsigned long val); -void queue_l2_entry_update(pmd_t *ptr, unsigned long val); +void queue_l2_entry_update(pmd_t *ptr, pmd_t val); void queue_pt_switch(unsigned long ptr); void queue_tlb_flush(void); void queue_invlpg(unsigned long ptr); @@ -77,7 +78,7 @@ void queue_pte_unpin(unsigned long ptr); void queue_set_ldt(unsigned long ptr, unsigned long bytes); void queue_machphys_update(unsigned long mfn, unsigned long pfn); void xen_l1_entry_update(pte_t *ptr, unsigned long val); -void xen_l2_entry_update(pmd_t *ptr, unsigned long val); +void xen_l2_entry_update(pmd_t *ptr, pmd_t val); void xen_pt_switch(unsigned long ptr); void xen_tlb_flush(void); void xen_invlpg(unsigned long ptr); diff --git a/linux-2.6.10-xen-sparse/include/asm-xen/linux-public/privcmd.h b/linux-2.6.11-xen-sparse/include/asm-xen/linux-public/privcmd.h similarity index 100% rename from linux-2.6.10-xen-sparse/include/asm-xen/linux-public/privcmd.h rename to linux-2.6.11-xen-sparse/include/asm-xen/linux-public/privcmd.h diff --git a/linux-2.6.10-xen-sparse/include/asm-xen/linux-public/suspend.h b/linux-2.6.11-xen-sparse/include/asm-xen/linux-public/suspend.h similarity index 100% rename from linux-2.6.10-xen-sparse/include/asm-xen/linux-public/suspend.h rename to linux-2.6.11-xen-sparse/include/asm-xen/linux-public/suspend.h diff --git a/linux-2.6.10-xen-sparse/include/asm-xen/multicall.h b/linux-2.6.11-xen-sparse/include/asm-xen/multicall.h similarity index 100% rename from linux-2.6.10-xen-sparse/include/asm-xen/multicall.h rename to linux-2.6.11-xen-sparse/include/asm-xen/multicall.h diff --git a/linux-2.6.10-xen-sparse/include/asm-xen/queues.h b/linux-2.6.11-xen-sparse/include/asm-xen/queues.h similarity index 100% rename from linux-2.6.10-xen-sparse/include/asm-xen/queues.h rename to linux-2.6.11-xen-sparse/include/asm-xen/queues.h diff --git a/linux-2.6.10-xen-sparse/include/asm-xen/xen_proc.h b/linux-2.6.11-xen-sparse/include/asm-xen/xen_proc.h similarity index 100% rename from linux-2.6.10-xen-sparse/include/asm-xen/xen_proc.h rename to linux-2.6.11-xen-sparse/include/asm-xen/xen_proc.h diff --git a/linux-2.6.10-xen-sparse/include/linux/gfp.h b/linux-2.6.11-xen-sparse/include/linux/gfp.h similarity index 98% rename from linux-2.6.10-xen-sparse/include/linux/gfp.h rename to linux-2.6.11-xen-sparse/include/linux/gfp.h index 6b1642d379..42fe579514 100644 --- a/linux-2.6.10-xen-sparse/include/linux/gfp.h +++ b/linux-2.6.11-xen-sparse/include/linux/gfp.h @@ -37,6 +37,7 @@ struct vm_area_struct; #define __GFP_NORETRY 0x1000 /* Do not retry. Might fail */ #define __GFP_NO_GROW 0x2000 /* Slab internal usage */ #define __GFP_COMP 0x4000 /* Add compound page metadata */ +#define __GFP_ZERO 0x8000 /* Return zeroed page on success */ #define __GFP_BITS_SHIFT 16 /* Room for 16 __GFP_FOO bits */ #define __GFP_BITS_MASK ((1 << __GFP_BITS_SHIFT) - 1) diff --git a/linux-2.6.10-xen-sparse/include/linux/irq.h b/linux-2.6.11-xen-sparse/include/linux/irq.h similarity index 99% rename from linux-2.6.10-xen-sparse/include/linux/irq.h rename to linux-2.6.11-xen-sparse/include/linux/irq.h index 12584aa5b4..2e0aeb9d39 100644 --- a/linux-2.6.10-xen-sparse/include/linux/irq.h +++ b/linux-2.6.11-xen-sparse/include/linux/irq.h @@ -59,9 +59,10 @@ typedef struct hw_interrupt_type hw_irq_controller; * Pad this out to 32 bytes for cache and indexing reasons. */ typedef struct irq_desc { - unsigned int status; /* IRQ status */ hw_irq_controller *handler; + void *handler_data; struct irqaction *action; /* IRQ action list */ + unsigned int status; /* IRQ status */ unsigned int depth; /* nested irq disables */ unsigned int irq_count; /* For detecting broken interrupts */ unsigned int irqs_unhandled; diff --git a/linux-2.6.10-xen-sparse/kernel/irq/manage.c b/linux-2.6.11-xen-sparse/kernel/irq/manage.c similarity index 99% rename from linux-2.6.10-xen-sparse/kernel/irq/manage.c rename to linux-2.6.11-xen-sparse/kernel/irq/manage.c index fb7f365b3d..47678cdd25 100644 --- a/linux-2.6.10-xen-sparse/kernel/irq/manage.c +++ b/linux-2.6.11-xen-sparse/kernel/irq/manage.c @@ -15,6 +15,8 @@ #ifdef CONFIG_SMP +cpumask_t irq_affinity[NR_IRQS] = { [0 ... NR_IRQS-1] = CPU_MASK_ALL }; + /** * synchronize_irq - wait for pending IRQ handlers (on other CPUs) * diff --git a/linux-2.6.10-xen-sparse/mkbuildtree b/linux-2.6.11-xen-sparse/mkbuildtree similarity index 100% rename from linux-2.6.10-xen-sparse/mkbuildtree rename to linux-2.6.11-xen-sparse/mkbuildtree diff --git a/linux-2.6.10-xen-sparse/mm/memory.c b/linux-2.6.11-xen-sparse/mm/memory.c similarity index 66% rename from linux-2.6.10-xen-sparse/mm/memory.c rename to linux-2.6.11-xen-sparse/mm/memory.c index cd10d79d94..1ff6332578 100644 --- a/linux-2.6.10-xen-sparse/mm/memory.c +++ b/linux-2.6.11-xen-sparse/mm/memory.c @@ -34,6 +34,8 @@ * * 16.07.99 - Support of BIGMEM added by Gerhard Wichert, Siemens AG * (Gerhard.Wichert@pdb.siemens.de) + * + * Aug/Sep 2004 Changed to four level page tables (Andi Kleen) */ #include @@ -44,6 +46,7 @@ #include #include #include +#include #include #include @@ -74,84 +77,120 @@ unsigned long num_physpages; * and ZONE_HIGHMEM. */ void * high_memory; -struct page *highmem_start_page; unsigned long vmalloc_earlyreserve; EXPORT_SYMBOL(num_physpages); -EXPORT_SYMBOL(highmem_start_page); EXPORT_SYMBOL(high_memory); EXPORT_SYMBOL(vmalloc_earlyreserve); /* - * We special-case the C-O-W ZERO_PAGE, because it's such - * a common occurrence (no need to read the page to know - * that it's zero - better for the cache and memory subsystem). + * Note: this doesn't free the actual pages themselves. That + * has been handled earlier when unmapping all the memory regions. */ -static inline void copy_cow_page(struct page * from, struct page * to, unsigned long address) +static inline void clear_pmd_range(struct mmu_gather *tlb, pmd_t *pmd, unsigned long start, unsigned long end) { - if (from == ZERO_PAGE(address)) { - clear_user_highpage(to, address); + struct page *page; + + if (pmd_none(*pmd)) return; + if (unlikely(pmd_bad(*pmd))) { + pmd_ERROR(*pmd); + pmd_clear(pmd); + return; + } + if (!((start | end) & ~PMD_MASK)) { + /* Only clear full, aligned ranges */ + page = pmd_page(*pmd); + pmd_clear(pmd); + dec_page_state(nr_page_table_pages); + tlb->mm->nr_ptes--; + pte_free_tlb(tlb, page); } - copy_user_highpage(to, from, address); } -/* - * Note: this doesn't free the actual pages themselves. That - * has been handled earlier when unmapping all the memory regions. - */ -static inline void free_one_pmd(struct mmu_gather *tlb, pmd_t * dir) +static inline void clear_pud_range(struct mmu_gather *tlb, pud_t *pud, unsigned long start, unsigned long end) { - struct page *page; + unsigned long addr = start, next; + pmd_t *pmd, *__pmd; - if (pmd_none(*dir)) + if (pud_none(*pud)) return; - if (unlikely(pmd_bad(*dir))) { - pmd_ERROR(*dir); - pmd_clear(dir); + if (unlikely(pud_bad(*pud))) { + pud_ERROR(*pud); + pud_clear(pud); return; } - page = pmd_page(*dir); - pmd_clear(dir); - dec_page_state(nr_page_table_pages); - tlb->mm->nr_ptes--; - pte_free_tlb(tlb, page); + + pmd = __pmd = pmd_offset(pud, start); + do { + next = (addr + PMD_SIZE) & PMD_MASK; + if (next > end || next <= addr) + next = end; + + clear_pmd_range(tlb, pmd, addr, next); + pmd++; + addr = next; + } while (addr && (addr < end)); + + if (!((start | end) & ~PUD_MASK)) { + /* Only clear full, aligned ranges */ + pud_clear(pud); + pmd_free_tlb(tlb, __pmd); + } } -static inline void free_one_pgd(struct mmu_gather *tlb, pgd_t * dir) + +static inline void clear_pgd_range(struct mmu_gather *tlb, pgd_t *pgd, unsigned long start, unsigned long end) { - int j; - pmd_t * pmd; + unsigned long addr = start, next; + pud_t *pud, *__pud; - if (pgd_none(*dir)) + if (pgd_none(*pgd)) return; - if (unlikely(pgd_bad(*dir))) { - pgd_ERROR(*dir); - pgd_clear(dir); + if (unlikely(pgd_bad(*pgd))) { + pgd_ERROR(*pgd); + pgd_clear(pgd); return; } - pmd = pmd_offset(dir, 0); - pgd_clear(dir); - for (j = 0; j < PTRS_PER_PMD ; j++) - free_one_pmd(tlb, pmd+j); - pmd_free_tlb(tlb, pmd); + + pud = __pud = pud_offset(pgd, start); + do { + next = (addr + PUD_SIZE) & PUD_MASK; + if (next > end || next <= addr) + next = end; + + clear_pud_range(tlb, pud, addr, next); + pud++; + addr = next; + } while (addr && (addr < end)); + + if (!((start | end) & ~PGDIR_MASK)) { + /* Only clear full, aligned ranges */ + pgd_clear(pgd); + pud_free_tlb(tlb, __pud); + } } /* - * This function clears all user-level page tables of a process - this - * is needed by execve(), so that old pages aren't in the way. + * This function clears user-level page tables of a process. * * Must be called with pagetable lock held. */ -void clear_page_tables(struct mmu_gather *tlb, unsigned long first, int nr) +void clear_page_range(struct mmu_gather *tlb, unsigned long start, unsigned long end) { - pgd_t * page_dir = tlb->mm->pgd; - - page_dir += first; - do { - free_one_pgd(tlb, page_dir); - page_dir++; - } while (--nr); + unsigned long addr = start, next; + pgd_t * pgd = pgd_offset(tlb->mm, start); + unsigned long i; + + for (i = pgd_index(start); i <= pgd_index(end-1); i++) { + next = (addr + PGDIR_SIZE) & PGDIR_MASK; + if (next > end || next <= addr) + next = end; + + clear_pgd_range(tlb, pgd, addr, next); + pgd++; + addr = next; + } } pte_t fastcall * pte_alloc_map(struct mm_struct *mm, pmd_t *pmd, unsigned long address) @@ -204,165 +243,212 @@ pte_t fastcall * pte_alloc_kernel(struct mm_struct *mm, pmd_t *pmd, unsigned lon out: return pte_offset_kernel(pmd, address); } -#define PTE_TABLE_MASK ((PTRS_PER_PTE-1) * sizeof(pte_t)) -#define PMD_TABLE_MASK ((PTRS_PER_PMD-1) * sizeof(pmd_t)) /* * copy one vm_area from one task to the other. Assumes the page tables * already present in the new task to be cleared in the whole range * covered by this vma. * - * 08Jan98 Merged into one routine from several inline routines to reduce - * variable count and make things faster. -jj - * * dst->page_table_lock is held on entry and exit, - * but may be dropped within pmd_alloc() and pte_alloc_map(). + * but may be dropped within p[mg]d_alloc() and pte_alloc_map(). */ -int copy_page_range(struct mm_struct *dst, struct mm_struct *src, - struct vm_area_struct *vma) + +static inline void +copy_swap_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, pte_t pte) { - pgd_t * src_pgd, * dst_pgd; - unsigned long address = vma->vm_start; - unsigned long end = vma->vm_end; - unsigned long cow; + if (pte_file(pte)) + return; + swap_duplicate(pte_to_swp_entry(pte)); + if (list_empty(&dst_mm->mmlist)) { + spin_lock(&mmlist_lock); + list_add(&dst_mm->mmlist, &src_mm->mmlist); + spin_unlock(&mmlist_lock); + } +} - if (is_vm_hugetlb_page(vma)) - return copy_hugetlb_page_range(dst, src, vma); +static inline void +copy_one_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, + pte_t *dst_pte, pte_t *src_pte, unsigned long vm_flags, + unsigned long addr) +{ + pte_t pte = *src_pte; + struct page *page; + unsigned long pfn; - cow = (vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE; - src_pgd = pgd_offset(src, address)-1; - dst_pgd = pgd_offset(dst, address)-1; + /* pte contains position in swap, so copy. */ + if (!pte_present(pte)) { + copy_swap_pte(dst_mm, src_mm, pte); + set_pte(dst_pte, pte); + return; + } + pfn = pte_pfn(pte); + /* the pte points outside of valid memory, the + * mapping is assumed to be good, meaningful + * and not mapped via rmap - duplicate the + * mapping as is. + */ + page = NULL; + if (pfn_valid(pfn)) + page = pfn_to_page(pfn); - for (;;) { - pmd_t * src_pmd, * dst_pmd; + if (!page || PageReserved(page)) { + set_pte(dst_pte, pte); + return; + } - src_pgd++; dst_pgd++; - - /* copy_pmd_range */ - - if (pgd_none(*src_pgd)) - goto skip_copy_pmd_range; - if (unlikely(pgd_bad(*src_pgd))) { - pgd_ERROR(*src_pgd); - pgd_clear(src_pgd); -skip_copy_pmd_range: address = (address + PGDIR_SIZE) & PGDIR_MASK; - if (!address || (address >= end)) - goto out; + /* + * If it's a COW mapping, write protect it both + * in the parent and the child + */ + if ((vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE) { + ptep_set_wrprotect(src_pte); + pte = *src_pte; + } + + /* + * If it's a shared mapping, mark it clean in + * the child + */ + if (vm_flags & VM_SHARED) + pte = pte_mkclean(pte); + pte = pte_mkold(pte); + get_page(page); + dst_mm->rss++; + if (PageAnon(page)) + dst_mm->anon_rss++; + set_pte(dst_pte, pte); + page_dup_rmap(page); +} + +static int copy_pte_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, + pmd_t *dst_pmd, pmd_t *src_pmd, struct vm_area_struct *vma, + unsigned long addr, unsigned long end) +{ + pte_t *src_pte, *dst_pte; + pte_t *s, *d; + unsigned long vm_flags = vma->vm_flags; + + d = dst_pte = pte_alloc_map(dst_mm, dst_pmd, addr); + if (!dst_pte) + return -ENOMEM; + + spin_lock(&src_mm->page_table_lock); + s = src_pte = pte_offset_map_nested(src_pmd, addr); + for (; addr < end; addr += PAGE_SIZE, s++, d++) { + if (pte_none(*s)) + continue; + copy_one_pte(dst_mm, src_mm, d, s, vm_flags, addr); + } + pte_unmap_nested(src_pte); + pte_unmap(dst_pte); + spin_unlock(&src_mm->page_table_lock); + cond_resched_lock(&dst_mm->page_table_lock); + return 0; +} + +static int copy_pmd_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, + pud_t *dst_pud, pud_t *src_pud, struct vm_area_struct *vma, + unsigned long addr, unsigned long end) +{ + pmd_t *src_pmd, *dst_pmd; + int err = 0; + unsigned long next; + + src_pmd = pmd_offset(src_pud, addr); + dst_pmd = pmd_alloc(dst_mm, dst_pud, addr); + if (!dst_pmd) + return -ENOMEM; + + for (; addr < end; addr = next, src_pmd++, dst_pmd++) { + next = (addr + PMD_SIZE) & PMD_MASK; + if (next > end || next <= addr) + next = end; + if (pmd_none(*src_pmd)) + continue; + if (pmd_bad(*src_pmd)) { + pmd_ERROR(*src_pmd); + pmd_clear(src_pmd); continue; } + err = copy_pte_range(dst_mm, src_mm, dst_pmd, src_pmd, + vma, addr, next); + if (err) + break; + } + return err; +} - src_pmd = pmd_offset(src_pgd, address); - dst_pmd = pmd_alloc(dst, dst_pgd, address); - if (!dst_pmd) - goto nomem; +static int copy_pud_range(struct mm_struct *dst_mm, struct mm_struct *src_mm, + pgd_t *dst_pgd, pgd_t *src_pgd, struct vm_area_struct *vma, + unsigned long addr, unsigned long end) +{ + pud_t *src_pud, *dst_pud; + int err = 0; + unsigned long next; + + src_pud = pud_offset(src_pgd, addr); + dst_pud = pud_alloc(dst_mm, dst_pgd, addr); + if (!dst_pud) + return -ENOMEM; + + for (; addr < end; addr = next, src_pud++, dst_pud++) { + next = (addr + PUD_SIZE) & PUD_MASK; + if (next > end || next <= addr) + next = end; + if (pud_none(*src_pud)) + continue; + if (pud_bad(*src_pud)) { + pud_ERROR(*src_pud); + pud_clear(src_pud); + continue; + } + err = copy_pmd_range(dst_mm, src_mm, dst_pud, src_pud, + vma, addr, next); + if (err) + break; + } + return err; +} - do { - pte_t * src_pte, * dst_pte; - - /* copy_pte_range */ - - if (pmd_none(*src_pmd)) - goto skip_copy_pte_range; - if (unlikely(pmd_bad(*src_pmd))) { - pmd_ERROR(*src_pmd); - pmd_clear(src_pmd); -skip_copy_pte_range: - address = (address + PMD_SIZE) & PMD_MASK; - if (address >= end) - goto out; - goto cont_copy_pmd_range; - } +int copy_page_range(struct mm_struct *dst, struct mm_struct *src, + struct vm_area_struct *vma) +{ + pgd_t *src_pgd, *dst_pgd; + unsigned long addr, start, end, next; + int err = 0; - dst_pte = pte_alloc_map(dst, dst_pmd, address); - if (!dst_pte) - goto nomem; - spin_lock(&src->page_table_lock); - src_pte = pte_offset_map_nested(src_pmd, address); - do { - pte_t pte = *src_pte; - struct page *page; - unsigned long pfn; - - /* copy_one_pte */ - - if (pte_none(pte)) - goto cont_copy_pte_range_noset; - /* pte contains position in swap, so copy. */ - if (!pte_present(pte)) { - if (!pte_file(pte)) { - swap_duplicate(pte_to_swp_entry(pte)); - if (list_empty(&dst->mmlist)) { - spin_lock(&mmlist_lock); - list_add(&dst->mmlist, - &src->mmlist); - spin_unlock(&mmlist_lock); - } - } - set_pte(dst_pte, pte); - goto cont_copy_pte_range_noset; - } - pfn = pte_pfn(pte); - /* the pte points outside of valid memory, the - * mapping is assumed to be good, meaningful - * and not mapped via rmap - duplicate the - * mapping as is. - */ - page = NULL; - if (pfn_valid(pfn)) - page = pfn_to_page(pfn); + if (is_vm_hugetlb_page(vma)) + return copy_hugetlb_page_range(dst, src, vma); - if (!page || PageReserved(page)) { - set_pte(dst_pte, pte); - goto cont_copy_pte_range_noset; - } + start = vma->vm_start; + src_pgd = pgd_offset(src, start); + dst_pgd = pgd_offset(dst, start); - /* - * If it's a COW mapping, write protect it both - * in the parent and the child - */ - if (cow) { - ptep_set_wrprotect(src_pte); - pte = *src_pte; - } + end = vma->vm_end; + addr = start; + while (addr && (addr < end-1)) { + next = (addr + PGDIR_SIZE) & PGDIR_MASK; + if (next > end || next <= addr) + next = end; + if (pgd_none(*src_pgd)) + goto next_pgd; + if (pgd_bad(*src_pgd)) { + pgd_ERROR(*src_pgd); + pgd_clear(src_pgd); + goto next_pgd; + } + err = copy_pud_range(dst, src, dst_pgd, src_pgd, + vma, addr, next); + if (err) + break; - /* - * If it's a shared mapping, mark it clean in - * the child - */ - if (vma->vm_flags & VM_SHARED) - pte = pte_mkclean(pte); - pte = pte_mkold(pte); - get_page(page); - dst->rss++; - if (PageAnon(page)) - dst->anon_rss++; - set_pte(dst_pte, pte); - page_dup_rmap(page); -cont_copy_pte_range_noset: - address += PAGE_SIZE; - if (address >= end) { - pte_unmap_nested(src_pte); - pte_unmap(dst_pte); - goto out_unlock; - } - src_pte++; - dst_pte++; - } while ((unsigned long)src_pte & PTE_TABLE_MASK); - pte_unmap_nested(src_pte-1); - pte_unmap(dst_pte-1); - spin_unlock(&src->page_table_lock); - cond_resched_lock(&dst->page_table_lock); -cont_copy_pmd_range: - src_pmd++; - dst_pmd++; - } while ((unsigned long)src_pmd & PMD_TABLE_MASK); +next_pgd: + src_pgd++; + dst_pgd++; + addr = next; } -out_unlock: - spin_unlock(&src->page_table_lock); -out: - return 0; -nomem: - return -ENOMEM; + + return err; } static void zap_pte_range(struct mmu_gather *tlb, @@ -449,23 +535,23 @@ static void zap_pte_range(struct mmu_gather *tlb, } static void zap_pmd_range(struct mmu_gather *tlb, - pgd_t * dir, unsigned long address, + pud_t *pud, unsigned long address, unsigned long size, struct zap_details *details) { pmd_t * pmd; unsigned long end; - if (pgd_none(*dir)) + if (pud_none(*pud)) return; - if (unlikely(pgd_bad(*dir))) { - pgd_ERROR(*dir); - pgd_clear(dir); + if (unlikely(pud_bad(*pud))) { + pud_ERROR(*pud); + pud_clear(pud); return; } - pmd = pmd_offset(dir, address); + pmd = pmd_offset(pud, address); end = address + size; - if (end > ((address + PGDIR_SIZE) & PGDIR_MASK)) - end = ((address + PGDIR_SIZE) & PGDIR_MASK); + if (end > ((address + PUD_SIZE) & PUD_MASK)) + end = ((address + PUD_SIZE) & PUD_MASK); do { zap_pte_range(tlb, pmd, address, end - address, details); address = (address + PMD_SIZE) & PMD_MASK; @@ -473,36 +559,54 @@ static void zap_pmd_range(struct mmu_gather *tlb, } while (address && (address < end)); } +static void zap_pud_range(struct mmu_gather *tlb, + pgd_t * pgd, unsigned long address, + unsigned long end, struct zap_details *details) +{ + pud_t * pud; + + if (pgd_none(*pgd)) + return; + if (unlikely(pgd_bad(*pgd))) { + pgd_ERROR(*pgd); + pgd_clear(pgd); + return; + } + pud = pud_offset(pgd, address); + do { + zap_pmd_range(tlb, pud, address, end - address, details); + address = (address + PUD_SIZE) & PUD_MASK; + pud++; + } while (address && (address < end)); +} + static void unmap_page_range(struct mmu_gather *tlb, struct vm_area_struct *vma, unsigned long address, unsigned long end, struct zap_details *details) { - pgd_t * dir; + unsigned long next; + pgd_t *pgd; + int i; BUG_ON(address >= end); - dir = pgd_offset(vma->vm_mm, address); + pgd = pgd_offset(vma->vm_mm, address); tlb_start_vma(tlb, vma); - do { - zap_pmd_range(tlb, dir, address, end - address, details); - address = (address + PGDIR_SIZE) & PGDIR_MASK; - dir++; - } while (address && (address < end)); + for (i = pgd_index(address); i <= pgd_index(end-1); i++) { + next = (address + PGDIR_SIZE) & PGDIR_MASK; + if (next <= address || next > end) + next = end; + zap_pud_range(tlb, pgd, address, next, details); + address = next; + pgd++; + } tlb_end_vma(tlb, vma); } -/* Dispose of an entire struct mmu_gather per rescheduling point */ -#if defined(CONFIG_SMP) && defined(CONFIG_PREEMPT) -#define ZAP_BLOCK_SIZE (FREE_PTE_NR * PAGE_SIZE) -#endif - -/* For UP, 256 pages at a time gives nice low latency */ -#if !defined(CONFIG_SMP) && defined(CONFIG_PREEMPT) -#define ZAP_BLOCK_SIZE (256 * PAGE_SIZE) -#endif - +#ifdef CONFIG_PREEMPT +# define ZAP_BLOCK_SIZE (8 * PAGE_SIZE) +#else /* No preempt: go for improved straight-line efficiency */ -#if !defined(CONFIG_PREEMPT) -#define ZAP_BLOCK_SIZE (1024 * PAGE_SIZE) +# define ZAP_BLOCK_SIZE (1024 * PAGE_SIZE) #endif /** @@ -541,7 +645,8 @@ int unmap_vmas(struct mmu_gather **tlbp, struct mm_struct *mm, unsigned long tlb_start = 0; /* For tlb_finish_mmu */ int tlb_start_valid = 0; int ret = 0; - int atomic = details && details->atomic; + spinlock_t *i_mmap_lock = details? details->i_mmap_lock: NULL; + int fullmm = tlb_is_full_mm(*tlbp); for ( ; vma && vma->vm_start < end_addr; vma = vma->vm_next) { unsigned long start; @@ -579,16 +684,29 @@ int unmap_vmas(struct mmu_gather **tlbp, struct mm_struct *mm, zap_bytes -= block; if ((long)zap_bytes > 0) continue; - if (!atomic && need_resched()) { - int fullmm = tlb_is_full_mm(*tlbp); - tlb_finish_mmu(*tlbp, tlb_start, start); - cond_resched_lock(&mm->page_table_lock); - *tlbp = tlb_gather_mmu(mm, fullmm); - tlb_start_valid = 0; + + tlb_finish_mmu(*tlbp, tlb_start, start); + + if (need_resched() || + need_lockbreak(&mm->page_table_lock) || + (i_mmap_lock && need_lockbreak(i_mmap_lock))) { + if (i_mmap_lock) { + /* must reset count of rss freed */ + *tlbp = tlb_gather_mmu(mm, fullmm); + details->break_addr = start; + goto out; + } + spin_unlock(&mm->page_table_lock); + cond_resched(); + spin_lock(&mm->page_table_lock); } + + *tlbp = tlb_gather_mmu(mm, fullmm); + tlb_start_valid = 0; zap_bytes = ZAP_BLOCK_SIZE; } } +out: return ret; } @@ -617,6 +735,7 @@ void zap_page_range(struct vm_area_struct *vma, unsigned long address, tlb = tlb_gather_mmu(mm, 0); unmap_vmas(&tlb, mm, vma, address, end, &nr_accounted, details); tlb_finish_mmu(tlb, address, end); + acct_update_integrals(); spin_unlock(&mm->page_table_lock); } @@ -624,10 +743,11 @@ void zap_page_range(struct vm_area_struct *vma, unsigned long address, * Do a quick page-table lookup for a single page. * mm->page_table_lock must be held. */ -struct page * -follow_page(struct mm_struct *mm, unsigned long address, int write) +static struct page * +__follow_page(struct mm_struct *mm, unsigned long address, int read, int write) { pgd_t *pgd; + pud_t *pud; pmd_t *pmd; pte_t *ptep, pte; unsigned long pfn; @@ -641,13 +761,15 @@ follow_page(struct mm_struct *mm, unsigned long address, int write) if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd))) goto out; - pmd = pmd_offset(pgd, address); - if (pmd_none(*pmd)) + pud = pud_offset(pgd, address); + if (pud_none(*pud) || unlikely(pud_bad(*pud))) + goto out; + + pmd = pmd_offset(pud, address); + if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd))) goto out; if (pmd_huge(*pmd)) return follow_huge_pmd(mm, address, pmd, write); - if (unlikely(pmd_bad(*pmd))) - goto out; ptep = pte_offset_map(pmd, address); if (!ptep) @@ -658,6 +780,8 @@ follow_page(struct mm_struct *mm, unsigned long address, int write) if (pte_present(pte)) { if (write && !pte_write(pte)) goto out; + if (read && !pte_read(pte)) + goto out; pfn = pte_pfn(pte); if (pfn_valid(pfn)) { page = pfn_to_page(pfn); @@ -672,6 +796,20 @@ out: return NULL; } +struct page * +follow_page(struct mm_struct *mm, unsigned long address, int write) +{ + return __follow_page(mm, address, /*read*/0, write); +} + +int +check_user_page_readable(struct mm_struct *mm, unsigned long address) +{ + return __follow_page(mm, address, /*read*/1, /*write*/0) != NULL; +} + +EXPORT_SYMBOL(check_user_page_readable); + /* * Given a physical address, is there a useful struct page pointing to * it? This may become more complex in the future if we start dealing @@ -691,6 +829,7 @@ untouched_anonymous_page(struct mm_struct* mm, struct vm_area_struct *vma, unsigned long address) { pgd_t *pgd; + pud_t *pud; pmd_t *pmd; /* Check if the vma is for an anonymous mapping. */ @@ -702,8 +841,12 @@ untouched_anonymous_page(struct mm_struct* mm, struct vm_area_struct *vma, if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd))) return 1; + pud = pud_offset(pgd, address); + if (pud_none(*pud) || unlikely(pud_bad(*pud))) + return 1; + /* Check if page middle directory entry exists. */ - pmd = pmd_offset(pgd, address); + pmd = pmd_offset(pud, address); if (pmd_none(*pmd) || unlikely(pmd_bad(*pmd))) return 1; @@ -735,6 +878,7 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, unsigned long pg = start & PAGE_MASK; struct vm_area_struct *gate_vma = get_gate_vma(tsk); pgd_t *pgd; + pud_t *pud; pmd_t *pmd; pte_t *pte; if (write) /* user gate pages are read-only */ @@ -744,7 +888,9 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, else pgd = pgd_offset_gate(mm, pg); BUG_ON(pgd_none(*pgd)); - pmd = pmd_offset(pgd, pg); + pud = pud_offset(pgd, pg); + BUG_ON(pud_none(*pud)); + pmd = pmd_offset(pud, pg); BUG_ON(pmd_none(*pmd)); pte = pte_offset_map(pmd, pg); BUG_ON(pte_none(*pte)); @@ -774,6 +920,8 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm, do { struct page *map; int lookup_write = write; + + cond_resched_lock(&mm->page_table_lock); while (!(map = follow_page(mm, start, lookup_write))) { /* * Shortcut for anonymous pages. We don't want @@ -857,16 +1005,16 @@ static void zeromap_pte_range(pte_t * pte, unsigned long address, } while (address && (address < end)); } -static inline int zeromap_pmd_range(struct mm_struct *mm, pmd_t * pmd, unsigned long address, - unsigned long size, pgprot_t prot) +static inline int zeromap_pmd_range(struct mm_struct *mm, pmd_t * pmd, + unsigned long address, unsigned long size, pgprot_t prot) { unsigned long base, end; - base = address & PGDIR_MASK; - address &= ~PGDIR_MASK; + base = address & PUD_MASK; + address &= ~PUD_MASK; end = address + size; - if (end > PGDIR_SIZE) - end = PGDIR_SIZE; + if (end > PUD_SIZE) + end = PUD_SIZE; do { pte_t * pte = pte_alloc_map(mm, pmd, base + address); if (!pte) @@ -879,31 +1027,65 @@ static inline int zeromap_pmd_range(struct mm_struct *mm, pmd_t * pmd, unsigned return 0; } -int zeromap_page_range(struct vm_area_struct *vma, unsigned long address, unsigned long size, pgprot_t prot) +static inline int zeromap_pud_range(struct mm_struct *mm, pud_t * pud, + unsigned long address, + unsigned long size, pgprot_t prot) +{ + unsigned long base, end; + int error = 0; + + base = address & PGDIR_MASK; + address &= ~PGDIR_MASK; + end = address + size; + if (end > PGDIR_SIZE) + end = PGDIR_SIZE; + do { + pmd_t * pmd = pmd_alloc(mm, pud, base + address); + error = -ENOMEM; + if (!pmd) + break; + error = zeromap_pmd_range(mm, pmd, base + address, + end - address, prot); + if (error) + break; + address = (address + PUD_SIZE) & PUD_MASK; + pud++; + } while (address && (address < end)); + return 0; +} + +int zeromap_page_range(struct vm_area_struct *vma, unsigned long address, + unsigned long size, pgprot_t prot) { + int i; int error = 0; - pgd_t * dir; + pgd_t * pgd; unsigned long beg = address; unsigned long end = address + size; + unsigned long next; struct mm_struct *mm = vma->vm_mm; - dir = pgd_offset(mm, address); + pgd = pgd_offset(mm, address); flush_cache_range(vma, beg, end); - if (address >= end) - BUG(); + BUG_ON(address >= end); + BUG_ON(end > vma->vm_end); spin_lock(&mm->page_table_lock); - do { - pmd_t *pmd = pmd_alloc(mm, dir, address); + for (i = pgd_index(address); i <= pgd_index(end-1); i++) { + pud_t *pud = pud_alloc(mm, pgd, address); error = -ENOMEM; - if (!pmd) + if (!pud) break; - error = zeromap_pmd_range(mm, pmd, address, end - address, prot); + next = (address + PGDIR_SIZE) & PGDIR_MASK; + if (next <= beg || next > end) + next = end; + error = zeromap_pud_range(mm, pud, address, + next - address, prot); if (error) break; - address = (address + PGDIR_SIZE) & PGDIR_MASK; - dir++; - } while (address && (address < end)); + address = next; + pgd++; + } /* * Why flush? zeromap_pte_range has a BUG_ON for !pte_none() */ @@ -917,8 +1099,9 @@ int zeromap_page_range(struct vm_area_struct *vma, unsigned long address, unsign * mappings are removed. any references to nonexistent pages results * in null mappings (currently treated as "copy-on-access") */ -static inline void remap_pte_range(pte_t * pte, unsigned long address, unsigned long size, - unsigned long pfn, pgprot_t prot) +static inline void +remap_pte_range(pte_t * pte, unsigned long address, unsigned long size, + unsigned long pfn, pgprot_t prot) { unsigned long end; @@ -936,22 +1119,24 @@ static inline void remap_pte_range(pte_t * pte, unsigned long address, unsigned } while (address && (address < end)); } -static inline int remap_pmd_range(struct mm_struct *mm, pmd_t * pmd, unsigned long address, unsigned long size, - unsigned long pfn, pgprot_t prot) +static inline int +remap_pmd_range(struct mm_struct *mm, pmd_t * pmd, unsigned long address, + unsigned long size, unsigned long pfn, pgprot_t prot) { unsigned long base, end; - base = address & PGDIR_MASK; - address &= ~PGDIR_MASK; + base = address & PUD_MASK; + address &= ~PUD_MASK; end = address + size; - if (end > PGDIR_SIZE) - end = PGDIR_SIZE; - pfn -= address >> PAGE_SHIFT; + if (end > PUD_SIZE) + end = PUD_SIZE; + pfn -= (address >> PAGE_SHIFT); do { pte_t * pte = pte_alloc_map(mm, pmd, base + address); if (!pte) return -ENOMEM; - remap_pte_range(pte, base + address, end - address, pfn + (address >> PAGE_SHIFT), prot); + remap_pte_range(pte, base + address, end - address, + (address >> PAGE_SHIFT) + pfn, prot); pte_unmap(pte); address = (address + PMD_SIZE) & PMD_MASK; pmd++; @@ -959,20 +1144,50 @@ static inline int remap_pmd_range(struct mm_struct *mm, pmd_t * pmd, unsigned lo return 0; } +static inline int remap_pud_range(struct mm_struct *mm, pud_t * pud, + unsigned long address, unsigned long size, + unsigned long pfn, pgprot_t prot) +{ + unsigned long base, end; + int error; + + base = address & PGDIR_MASK; + address &= ~PGDIR_MASK; + end = address + size; + if (end > PGDIR_SIZE) + end = PGDIR_SIZE; + pfn -= address >> PAGE_SHIFT; + do { + pmd_t *pmd = pmd_alloc(mm, pud, base+address); + error = -ENOMEM; + if (!pmd) + break; + error = remap_pmd_range(mm, pmd, base + address, end - address, + (address >> PAGE_SHIFT) + pfn, prot); + if (error) + break; + address = (address + PUD_SIZE) & PUD_MASK; + pud++; + } while (address && (address < end)); + return error; +} + /* Note: this is only safe if the mm semaphore is held when called. */ -int remap_pfn_range(struct vm_area_struct *vma, unsigned long from, unsigned long pfn, unsigned long size, pgprot_t prot) +int remap_pfn_range(struct vm_area_struct *vma, unsigned long from, + unsigned long pfn, unsigned long size, pgprot_t prot) { int error = 0; - pgd_t * dir; + pgd_t *pgd; unsigned long beg = from; unsigned long end = from + size; + unsigned long next; struct mm_struct *mm = vma->vm_mm; + int i; pfn -= from >> PAGE_SHIFT; - dir = pgd_offset(mm, from); + pgd = pgd_offset(mm, from); flush_cache_range(vma, beg, end); - if (from >= end) - BUG(); + BUG_ON(from >= end); /* * Physically remapped pages are special. Tell the @@ -983,25 +1198,32 @@ int remap_pfn_range(struct vm_area_struct *vma, unsigned long from, unsigned lon * this region. */ vma->vm_flags |= VM_IO | VM_RESERVED; + spin_lock(&mm->page_table_lock); - do { - pmd_t *pmd = pmd_alloc(mm, dir, from); + for (i = pgd_index(beg); i <= pgd_index(end-1); i++) { + pud_t *pud = pud_alloc(mm, pgd, from); error = -ENOMEM; - if (!pmd) + if (!pud) break; - error = remap_pmd_range(mm, pmd, from, end - from, pfn + (from >> PAGE_SHIFT), prot); + next = (from + PGDIR_SIZE) & PGDIR_MASK; + if (next > end || next <= from) + next = end; + error = remap_pud_range(mm, pud, from, end - from, + pfn + (from >> PAGE_SHIFT), prot); if (error) break; - from = (from + PGDIR_SIZE) & PGDIR_MASK; - dir++; - } while (from && (from < end)); + from = next; + pgd++; + } /* * Why flush? remap_pte_range has a BUG_ON for !pte_none() */ flush_tlb_range(vma, beg, end); spin_unlock(&mm->page_table_lock); + return error; } + EXPORT_SYMBOL(remap_pfn_range); /* @@ -1098,11 +1320,16 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma, if (unlikely(anon_vma_prepare(vma))) goto no_new_page; - new_page = alloc_page_vma(GFP_HIGHUSER, vma, address); - if (!new_page) - goto no_new_page; - copy_cow_page(old_page,new_page,address); - + if (old_page == ZERO_PAGE(address)) { + new_page = alloc_zeroed_user_highpage(vma, address); + if (!new_page) + goto no_new_page; + } else { + new_page = alloc_page_vma(GFP_HIGHUSER, vma, address); + if (!new_page) + goto no_new_page; + copy_user_highpage(new_page, old_page, address); + } /* * Re-check the pte - we dropped the lock */ @@ -1111,9 +1338,11 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct * vma, if (likely(pte_same(*page_table, pte))) { if (PageAnon(old_page)) mm->anon_rss--; - if (PageReserved(old_page)) + if (PageReserved(old_page)) { ++mm->rss; - else + acct_update_integrals(); + update_mem_hiwater(); + } else page_remove_rmap(old_page); break_cow(vma, new_page, address, page_table); lru_cache_add_active(new_page); @@ -1134,17 +1363,112 @@ no_new_page: } /* - * Helper function for unmap_mapping_range(). + * Helper functions for unmap_mapping_range(). + * + * __ Notes on dropping i_mmap_lock to reduce latency while unmapping __ + * + * We have to restart searching the prio_tree whenever we drop the lock, + * since the iterator is only valid while the lock is held, and anyway + * a later vma might be split and reinserted earlier while lock dropped. + * + * The list of nonlinear vmas could be handled more efficiently, using + * a placeholder, but handle it in the same way until a need is shown. + * It is important to search the prio_tree before nonlinear list: a vma + * may become nonlinear and be shifted from prio_tree to nonlinear list + * while the lock is dropped; but never shifted from list to prio_tree. + * + * In order to make forward progress despite restarting the search, + * vm_truncate_count is used to mark a vma as now dealt with, so we can + * quickly skip it next time around. Since the prio_tree search only + * shows us those vmas affected by unmapping the range in question, we + * can't efficiently keep all vmas in step with mapping->truncate_count: + * so instead reset them all whenever it wraps back to 0 (then go to 1). + * mapping->truncate_count and vma->vm_truncate_count are protected by + * i_mmap_lock. + * + * In order to make forward progress despite repeatedly restarting some + * large vma, note the break_addr set by unmap_vmas when it breaks out: + * and restart from that address when we reach that vma again. It might + * have been split or merged, shrunk or extended, but never shifted: so + * restart_addr remains valid so long as it remains in the vma's range. + * unmap_mapping_range forces truncate_count to leap over page-aligned + * values so we can save vma's restart_addr in its truncate_count field. */ -static inline void unmap_mapping_range_list(struct prio_tree_root *root, +#define is_restart_addr(truncate_count) (!((truncate_count) & ~PAGE_MASK)) + +static void reset_vma_truncate_counts(struct address_space *mapping) +{ + struct vm_area_struct *vma; + struct prio_tree_iter iter; + + vma_prio_tree_foreach(vma, &iter, &mapping->i_mmap, 0, ULONG_MAX) + vma->vm_truncate_count = 0; + list_for_each_entry(vma, &mapping->i_mmap_nonlinear, shared.vm_set.list) + vma->vm_truncate_count = 0; +} + +static int unmap_mapping_range_vma(struct vm_area_struct *vma, + unsigned long start_addr, unsigned long end_addr, + struct zap_details *details) +{ + unsigned long restart_addr; + int need_break; + +again: + restart_addr = vma->vm_truncate_count; + if (is_restart_addr(restart_addr) && start_addr < restart_addr) { + start_addr = restart_addr; + if (start_addr >= end_addr) { + /* Top of vma has been split off since last time */ + vma->vm_truncate_count = details->truncate_count; + return 0; + } + } + + details->break_addr = end_addr; + zap_page_range(vma, start_addr, end_addr - start_addr, details); + + /* + * We cannot rely on the break test in unmap_vmas: + * on the one hand, we don't want to restart our loop + * just because that broke out for the page_table_lock; + * on the other hand, it does no test when vma is small. + */ + need_break = need_resched() || + need_lockbreak(details->i_mmap_lock); + + if (details->break_addr >= end_addr) { + /* We have now completed this vma: mark it so */ + vma->vm_truncate_count = details->truncate_count; + if (!need_break) + return 0; + } else { + /* Note restart_addr in vma's truncate_count field */ + vma->vm_truncate_count = details->break_addr; + if (!need_break) + goto again; + } + + spin_unlock(details->i_mmap_lock); + cond_resched(); + spin_lock(details->i_mmap_lock); + return -EINTR; +} + +static inline void unmap_mapping_range_tree(struct prio_tree_root *root, struct zap_details *details) { struct vm_area_struct *vma; struct prio_tree_iter iter; pgoff_t vba, vea, zba, zea; +restart: vma_prio_tree_foreach(vma, &iter, root, details->first_index, details->last_index) { + /* Skip quickly over those we have already dealt with */ + if (vma->vm_truncate_count == details->truncate_count) + continue; + vba = vma->vm_pgoff; vea = vba + ((vma->vm_end - vma->vm_start) >> PAGE_SHIFT) - 1; /* Assume for now that PAGE_CACHE_SHIFT == PAGE_SHIFT */ @@ -1154,9 +1478,35 @@ static inline void unmap_mapping_range_list(struct prio_tree_root *root, zea = details->last_index; if (zea > vea) zea = vea; - zap_page_range(vma, + + if (unmap_mapping_range_vma(vma, ((zba - vba) << PAGE_SHIFT) + vma->vm_start, - (zea - zba + 1) << PAGE_SHIFT, details); + ((zea - vba + 1) << PAGE_SHIFT) + vma->vm_start, + details) < 0) + goto restart; + } +} + +static inline void unmap_mapping_range_list(struct list_head *head, + struct zap_details *details) +{ + struct vm_area_struct *vma; + + /* + * In nonlinear VMAs there is no correspondence between virtual address + * offset and file offset. So we must perform an exhaustive search + * across *all* the pages in each nonlinear VMA, not just the pages + * whose virtual address lies outside the file truncation point. + */ +restart: + list_for_each_entry(vma, head, shared.vm_set.list) { + /* Skip quickly over those we have already dealt with */ + if (vma->vm_truncate_count == details->truncate_count) + continue; + details->nonlinear_vma = vma; + if (unmap_mapping_range_vma(vma, vma->vm_start, + vma->vm_end, details) < 0) + goto restart; } } @@ -1195,32 +1545,34 @@ void unmap_mapping_range(struct address_space *mapping, details.nonlinear_vma = NULL; details.first_index = hba; details.last_index = hba + hlen - 1; - details.atomic = 1; /* A spinlock is held */ if (details.last_index < details.first_index) details.last_index = ULONG_MAX; + details.i_mmap_lock = &mapping->i_mmap_lock; spin_lock(&mapping->i_mmap_lock); - /* Protect against page fault */ - atomic_inc(&mapping->truncate_count); - - if (unlikely(!prio_tree_empty(&mapping->i_mmap))) - unmap_mapping_range_list(&mapping->i_mmap, &details); + /* serialize i_size write against truncate_count write */ + smp_wmb(); + /* Protect against page faults, and endless unmapping loops */ + mapping->truncate_count++; /* - * In nonlinear VMAs there is no correspondence between virtual address - * offset and file offset. So we must perform an exhaustive search - * across *all* the pages in each nonlinear VMA, not just the pages - * whose virtual address lies outside the file truncation point. + * For archs where spin_lock has inclusive semantics like ia64 + * this smp_mb() will prevent to read pagetable contents + * before the truncate_count increment is visible to + * other cpus. */ - if (unlikely(!list_empty(&mapping->i_mmap_nonlinear))) { - struct vm_area_struct *vma; - list_for_each_entry(vma, &mapping->i_mmap_nonlinear, - shared.vm_set.list) { - details.nonlinear_vma = vma; - zap_page_range(vma, vma->vm_start, - vma->vm_end - vma->vm_start, &details); - } + smp_mb(); + if (unlikely(is_restart_addr(mapping->truncate_count))) { + if (mapping->truncate_count == 0) + reset_vma_truncate_counts(mapping); + mapping->truncate_count++; } + details.truncate_count = mapping->truncate_count; + + if (unlikely(!prio_tree_empty(&mapping->i_mmap))) + unmap_mapping_range_tree(&mapping->i_mmap, &details); + if (unlikely(!list_empty(&mapping->i_mmap_nonlinear))) + unmap_mapping_range_list(&mapping->i_mmap_nonlinear, &details); spin_unlock(&mapping->i_mmap_lock); } EXPORT_SYMBOL(unmap_mapping_range); @@ -1395,6 +1747,9 @@ static int do_swap_page(struct mm_struct * mm, remove_exclusive_swap_page(page); mm->rss++; + acct_update_integrals(); + update_mem_hiwater(); + pte = mk_pte(page, vma->vm_page_prot); if (write_access && can_share_swap_page(page)) { pte = maybe_mkwrite(pte_mkdirty(pte), vma); @@ -1445,10 +1800,9 @@ do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, if (unlikely(anon_vma_prepare(vma))) goto no_mem; - page = alloc_page_vma(GFP_HIGHUSER, vma, addr); + page = alloc_zeroed_user_highpage(vma, addr); if (!page) goto no_mem; - clear_user_highpage(page, addr); spin_lock(&mm->page_table_lock); page_table = pte_offset_map(pmd, addr); @@ -1460,11 +1814,13 @@ do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma, goto out; } mm->rss++; + acct_update_integrals(); + update_mem_hiwater(); entry = maybe_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot)), vma); lru_cache_add_active(page); - mark_page_accessed(page); + SetPageReferenced(page); page_add_anon_rmap(page, vma, addr); } @@ -1499,7 +1855,7 @@ do_no_page(struct mm_struct *mm, struct vm_area_struct *vma, struct page * new_page; struct address_space *mapping = NULL; pte_t entry; - int sequence = 0; + unsigned int sequence = 0; int ret = VM_FAULT_MINOR; int anon = 0; @@ -1511,11 +1867,19 @@ do_no_page(struct mm_struct *mm, struct vm_area_struct *vma, if (vma->vm_file) { mapping = vma->vm_file->f_mapping; - sequence = atomic_read(&mapping->truncate_count); + sequence = mapping->truncate_count; + smp_rmb(); /* serializes i_size against truncate_count */ } - smp_rmb(); /* Prevent CPU from reordering lock-free ->nopage() */ retry: + cond_resched(); new_page = vma->vm_ops->nopage(vma, address & PAGE_MASK, &ret); + /* + * No smp_rmb is needed here as long as there's a full + * spin_lock/unlock sequence inside the ->nopage callback + * (for the pagecache lookup) that acts as an implicit + * smp_mb() and prevents the i_size read to happen + * after the next truncate_count read. + */ /* no page was available -- either SIGBUS or OOM */ if (new_page == NOPAGE_SIGBUS) @@ -1546,9 +1910,8 @@ retry: * invalidated this page. If unmap_mapping_range got called, * retry getting the page. */ - if (mapping && - (unlikely(sequence != atomic_read(&mapping->truncate_count)))) { - sequence = atomic_read(&mapping->truncate_count); + if (mapping && unlikely(sequence != mapping->truncate_count)) { + sequence = mapping->truncate_count; spin_unlock(&mm->page_table_lock); page_cache_release(new_page); goto retry; @@ -1569,6 +1932,9 @@ retry: if (pte_none(*page_table)) { if (!PageReserved(new_page)) ++mm->rss; + acct_update_integrals(); + update_mem_hiwater(); + flush_icache_page(vma, new_page); entry = mk_pte(new_page, vma->vm_page_prot); if (write_access) @@ -1693,13 +2059,14 @@ static inline int handle_pte_fault(struct mm_struct *mm, * By the time we get here, we already hold the mm semaphore */ int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct * vma, - unsigned long address, int write_access) + unsigned long address, int write_access) { pgd_t *pgd; + pud_t *pud; pmd_t *pmd; + pte_t *pte; __set_current_state(TASK_RUNNING); - pgd = pgd_offset(mm, address); inc_page_state(pgfault); @@ -1710,18 +2077,61 @@ int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct * vma, * We need the page table lock to synchronize with kswapd * and the SMP-safe atomic PTE updates. */ + pgd = pgd_offset(mm, address); spin_lock(&mm->page_table_lock); - pmd = pmd_alloc(mm, pgd, address); - if (pmd) { - pte_t * pte = pte_alloc_map(mm, pmd, address); - if (pte) - return handle_pte_fault(mm, vma, address, write_access, pte, pmd); - } + pud = pud_alloc(mm, pgd, address); + if (!pud) + goto oom; + + pmd = pmd_alloc(mm, pud, address); + if (!pmd) + goto oom; + + pte = pte_alloc_map(mm, pmd, address); + if (!pte) + goto oom; + + return handle_pte_fault(mm, vma, address, write_access, pte, pmd); + + oom: spin_unlock(&mm->page_table_lock); return VM_FAULT_OOM; } +#ifndef __ARCH_HAS_4LEVEL_HACK +/* + * Allocate page upper directory. + * + * We've already handled the fast-path in-line, and we own the + * page table lock. + * + * On a two-level or three-level page table, this ends up actually being + * entirely optimized away. + */ +pud_t fastcall *__pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address) +{ + pud_t *new; + + spin_unlock(&mm->page_table_lock); + new = pud_alloc_one(mm, address); + spin_lock(&mm->page_table_lock); + if (!new) + return NULL; + + /* + * Because we dropped the lock, we should re-check the + * entry, as somebody else could have populated it.. + */ + if (pgd_present(*pgd)) { + pud_free(new); + goto out; + } + pgd_populate(mm, pgd, new); + out: + return pud_offset(pgd, address); +} + /* * Allocate page middle directory. * @@ -1731,7 +2141,7 @@ int handle_mm_fault(struct mm_struct *mm, struct vm_area_struct * vma, * On a two-level page table, this ends up actually being entirely * optimized away. */ -pmd_t fastcall *__pmd_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address) +pmd_t fastcall *__pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address) { pmd_t *new; @@ -1745,14 +2155,38 @@ pmd_t fastcall *__pmd_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long addr * Because we dropped the lock, we should re-check the * entry, as somebody else could have populated it.. */ - if (pgd_present(*pgd)) { + if (pud_present(*pud)) { pmd_free(new); goto out; } - pgd_populate(mm, pgd, new); + pud_populate(mm, pud, new); + out: + return pmd_offset(pud, address); +} +#else +pmd_t fastcall *__pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address) +{ + pmd_t *new; + + spin_unlock(&mm->page_table_lock); + new = pmd_alloc_one(mm, address); + spin_lock(&mm->page_table_lock); + if (!new) + return NULL; + + /* + * Because we dropped the lock, we should re-check the + * entry, as somebody else could have populated it.. + */ + if (pgd_present(*pud)) { + pmd_free(new); + goto out; + } + pgd_populate(mm, pud, new); out: - return pmd_offset(pgd, address); + return pmd_offset(pud, address); } +#endif int make_pages_present(unsigned long addr, unsigned long end) { @@ -1783,17 +2217,21 @@ struct page * vmalloc_to_page(void * vmalloc_addr) unsigned long addr = (unsigned long) vmalloc_addr; struct page *page = NULL; pgd_t *pgd = pgd_offset_k(addr); + pud_t *pud; pmd_t *pmd; pte_t *ptep, pte; if (!pgd_none(*pgd)) { - pmd = pmd_offset(pgd, addr); - if (!pmd_none(*pmd)) { - ptep = pte_offset_map(pmd, addr); - pte = *ptep; - if (pte_present(pte)) - page = pte_page(pte); - pte_unmap(ptep); + pud = pud_offset(pgd, addr); + if (!pud_none(*pud)) { + pmd = pmd_offset(pud, addr); + if (!pmd_none(*pmd)) { + ptep = pte_offset_map(pmd, addr); + pte = *ptep; + if (pte_present(pte)) + page = pte_page(pte); + pte_unmap(ptep); + } } } return page; @@ -1811,7 +2249,23 @@ unsigned long vmalloc_to_pfn(void * vmalloc_addr) EXPORT_SYMBOL(vmalloc_to_pfn); -#if !defined(CONFIG_ARCH_GATE_AREA) +/* + * update_mem_hiwater + * - update per process rss and vm high water data + */ +void update_mem_hiwater(void) +{ + struct task_struct *tsk = current; + + if (tsk->mm) { + if (tsk->mm->hiwater_rss < tsk->mm->rss) + tsk->mm->hiwater_rss = tsk->mm->rss; + if (tsk->mm->hiwater_vm < tsk->mm->total_vm) + tsk->mm->hiwater_vm = tsk->mm->total_vm; + } +} + +#if !defined(__HAVE_ARCH_GATE_AREA) #if defined(AT_SYSINFO_EHDR) struct vm_area_struct gate_vma; @@ -1837,7 +2291,7 @@ struct vm_area_struct *get_gate_vma(struct task_struct *tsk) #endif } -int in_gate_area(struct task_struct *task, unsigned long addr) +int in_gate_area_no_task(unsigned long addr) { #ifdef AT_SYSINFO_EHDR if ((addr >= FIXADDR_USER_START) && (addr < FIXADDR_USER_END)) @@ -1846,4 +2300,4 @@ int in_gate_area(struct task_struct *task, unsigned long addr) return 0; } -#endif +#endif /* __HAVE_ARCH_GATE_AREA */ diff --git a/linux-2.6.10-xen-sparse/mm/page_alloc.c b/linux-2.6.11-xen-sparse/mm/page_alloc.c similarity index 80% rename from linux-2.6.10-xen-sparse/mm/page_alloc.c rename to linux-2.6.11-xen-sparse/mm/page_alloc.c index 6676de084c..4d55438fc3 100644 --- a/linux-2.6.10-xen-sparse/mm/page_alloc.c +++ b/linux-2.6.11-xen-sparse/mm/page_alloc.c @@ -32,17 +32,27 @@ #include #include #include +#include #include +#include "internal.h" -nodemask_t node_online_map = NODE_MASK_NONE; +/* MCD - HACK: Find somewhere to initialize this EARLY, or make this initializer cleaner */ +nodemask_t node_online_map = { { [0] = 1UL } }; nodemask_t node_possible_map = NODE_MASK_ALL; struct pglist_data *pgdat_list; unsigned long totalram_pages; unsigned long totalhigh_pages; long nr_swap_pages; -int numnodes = 1; -int sysctl_lower_zone_protection = 0; +/* + * results with 256, 32 in the lowmem_reserve sysctl: + * 1G machine -> (16M dma, 800M-16M normal, 1G-800M high) + * 1G machine -> (16M dma, 784M normal, 224M high) + * NORMAL allocation will leave 784M/256 of ram reserved in the ZONE_DMA + * HIGHMEM allocation will leave 224M/32 of ram reserved in ZONE_NORMAL + * HIGHMEM allocation will (224M+784M)/256 of ram reserved in ZONE_DMA + */ +int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES-1] = { 256, 32 }; EXPORT_SYMBOL(totalram_pages); EXPORT_SYMBOL(nr_swap_pages); @@ -69,6 +79,10 @@ static int bad_range(struct zone *zone, struct page *page) return 1; if (page_to_pfn(page) < zone->zone_start_pfn) return 1; +#ifdef CONFIG_HOLES_IN_ZONE + if (!pfn_valid(page_to_pfn(page))) + return 1; +#endif if (zone != page_zone(page)) return 1; return 0; @@ -156,6 +170,45 @@ static void destroy_compound_page(struct page *page, unsigned long order) } #endif /* CONFIG_HUGETLB_PAGE */ +/* + * function for dealing with page's order in buddy system. + * zone->lock is already acquired when we use these. + * So, we don't need atomic page->flags operations here. + */ +static inline unsigned long page_order(struct page *page) { + return page->private; +} + +static inline void set_page_order(struct page *page, int order) { + page->private = order; + __SetPagePrivate(page); +} + +static inline void rmv_page_order(struct page *page) +{ + __ClearPagePrivate(page); + page->private = 0; +} + +/* + * This function checks whether a page is free && is the buddy + * we can do coalesce a page and its buddy if + * (a) the buddy is free && + * (b) the buddy is on the buddy system && + * (c) a page and its buddy have the same order. + * for recording page's order, we use page->private and PG_private. + * + */ +static inline int page_is_buddy(struct page *page, int order) +{ + if (PagePrivate(page) && + (page_order(page) == order) && + !PageReserved(page) && + page_count(page) == 0) + return 1; + return 0; +} + /* * Freeing function for a buddy system allocator. * @@ -168,9 +221,10 @@ static void destroy_compound_page(struct page *page, unsigned long order) * at the bottom level available, and propagating the changes upward * as necessary, plus some accounting needed to play nicely with other * parts of the VM system. - * At each level, we keep one bit for each pair of blocks, which - * is set to 1 iff only one of the pair is allocated. So when we - * are allocating or freeing one, we can derive the state of the + * At each level, we keep a list of pages, which are heads of continuous + * free pages of length of (1 << order) and marked with PG_Private.Page's + * order is recorded in page->private field. + * So when we are allocating or freeing one, we can derive the state of the * other. That is, if we allocate a small block, and both were * free, the remainder of the region must be split into blocks. * If a block is freed, and its buddy is also free, then this @@ -180,42 +234,44 @@ static void destroy_compound_page(struct page *page, unsigned long order) */ static inline void __free_pages_bulk (struct page *page, struct page *base, - struct zone *zone, struct free_area *area, unsigned int order) + struct zone *zone, unsigned int order) { - unsigned long page_idx, index, mask; + unsigned long page_idx; + struct page *coalesced; + int order_size = 1 << order; - if (order) + if (unlikely(order)) destroy_compound_page(page, order); - mask = (~0UL) << order; + page_idx = page - base; - if (page_idx & ~mask) - BUG(); - index = page_idx >> (1 + order); - zone->free_pages += 1 << order; + BUG_ON(page_idx & (order_size - 1)); + BUG_ON(bad_range(zone, page)); + + zone->free_pages += order_size; while (order < MAX_ORDER-1) { - struct page *buddy1, *buddy2; + struct free_area *area; + struct page *buddy; + int buddy_idx; - BUG_ON(area >= zone->free_area + MAX_ORDER); - if (!__test_and_change_bit(index, area->map)) - /* - * the buddy page is still allocated. - */ + buddy_idx = (page_idx ^ (1 << order)); + buddy = base + buddy_idx; + if (bad_range(zone, buddy)) + break; + if (!page_is_buddy(buddy, order)) break; - /* Move the buddy up one level. */ - buddy1 = base + (page_idx ^ (1 << order)); - buddy2 = base + page_idx; - BUG_ON(bad_range(zone, buddy1)); - BUG_ON(bad_range(zone, buddy2)); - list_del(&buddy1->lru); - mask <<= 1; + list_del(&buddy->lru); + area = zone->free_area + order; + area->nr_free--; + rmv_page_order(buddy); + page_idx &= buddy_idx; order++; - area++; - index >>= 1; - page_idx &= mask; } - list_add(&(base + page_idx)->lru, &area->free_list); + coalesced = base + page_idx; + set_page_order(coalesced, order); + list_add(&coalesced->lru, &zone->free_area[order].free_list); + zone->free_area[order].nr_free++; } static inline void free_pages_check(const char *function, struct page *page) @@ -253,12 +309,10 @@ free_pages_bulk(struct zone *zone, int count, struct list_head *list, unsigned int order) { unsigned long flags; - struct free_area *area; struct page *base, *page = NULL; int ret = 0; base = zone->zone_mem_map; - area = zone->free_area + order; spin_lock_irqsave(&zone->lock, flags); zone->all_unreclaimable = 0; zone->pages_scanned = 0; @@ -266,7 +320,7 @@ free_pages_bulk(struct zone *zone, int count, page = list_entry(list->prev, struct page, lru); /* have to delete it as __free_pages_bulk list manipulates */ list_del(&page->lru); - __free_pages_bulk(page, base, zone, area, order); + __free_pages_bulk(page, base, zone, order); ret++; } spin_unlock_irqrestore(&zone->lock, flags); @@ -282,6 +336,13 @@ void __free_pages_ok(struct page *page, unsigned int order) return; mod_page_state(pgfree, 1 << order); + +#ifndef CONFIG_MMU + if (order > 0) + for (i = 1 ; i < (1 << order) ; ++i) + __put_page(page + i); +#endif + for (i = 0 ; i < (1 << order) ; ++i) free_pages_check(__FUNCTION__, page + i); list_add(&page->lru, &list); @@ -289,8 +350,6 @@ void __free_pages_ok(struct page *page, unsigned int order) free_pages_bulk(page_zone(page), 1, &list, order); } -#define MARK_USED(index, order, area) \ - __change_bit((index) >> (1+(order)), (area)->map) /* * The order of subdivision here is critical for the IO subsystem. @@ -308,7 +367,7 @@ void __free_pages_ok(struct page *page, unsigned int order) */ static inline struct page * expand(struct zone *zone, struct page *page, - unsigned long index, int low, int high, struct free_area *area) + int low, int high, struct free_area *area) { unsigned long size = 1 << high; @@ -318,12 +377,13 @@ expand(struct zone *zone, struct page *page, size >>= 1; BUG_ON(bad_range(zone, &page[size])); list_add(&page[size].lru, &area->free_list); - MARK_USED(index + size, high, area); + area->nr_free++; + set_page_order(&page[size], high); } return page; } -static inline void set_page_refs(struct page *page, int order) +void set_page_refs(struct page *page, int order) { #ifdef CONFIG_MMU set_page_count(page, 1); @@ -333,9 +393,10 @@ static inline void set_page_refs(struct page *page, int order) /* * We need to reference all the pages for this order, otherwise if * anyone accesses one of the pages with (get/put) it will be freed. + * - eg: access_process_vm() */ for (i = 0; i < (1 << order); i++) - set_page_count(page+i, 1); + set_page_count(page + i, 1); #endif /* CONFIG_MMU */ } @@ -361,6 +422,7 @@ static void prep_new_page(struct page *page, int order) 1 << PG_checked | 1 << PG_mappedtodisk); page->private = 0; set_page_refs(page, order); + kernel_map_pages(page, 1 << order, 1); } /* @@ -372,7 +434,6 @@ static struct page *__rmqueue(struct zone *zone, unsigned int order) struct free_area * area; unsigned int current_order; struct page *page; - unsigned int index; for (current_order = order; current_order < MAX_ORDER; ++current_order) { area = zone->free_area + current_order; @@ -381,11 +442,10 @@ static struct page *__rmqueue(struct zone *zone, unsigned int order) page = list_entry(area->free_list.next, struct page, lru); list_del(&page->lru); - index = page - zone->zone_mem_map; - if (current_order != MAX_ORDER-1) - MARK_USED(index, current_order, area); + rmv_page_order(page); + area->nr_free--; zone->free_pages -= 1UL << order; - return expand(zone, page, index, order, current_order, area); + return expand(zone, page, order, current_order, area); } return NULL; @@ -438,26 +498,30 @@ static void __drain_pages(unsigned int cpu) #endif /* CONFIG_PM || CONFIG_HOTPLUG_CPU */ #ifdef CONFIG_PM -int is_head_of_free_region(struct page *page) + +void mark_free_pages(struct zone *zone) { - struct zone *zone = page_zone(page); - unsigned long flags; + unsigned long zone_pfn, flags; int order; struct list_head *curr; - /* - * Should not matter as we need quiescent system for - * suspend anyway, but... - */ + if (!zone->spanned_pages) + return; + spin_lock_irqsave(&zone->lock, flags); + for (zone_pfn = 0; zone_pfn < zone->spanned_pages; ++zone_pfn) + ClearPageNosaveFree(pfn_to_page(zone_pfn + zone->zone_start_pfn)); + for (order = MAX_ORDER - 1; order >= 0; --order) - list_for_each(curr, &zone->free_area[order].free_list) - if (page == list_entry(curr, struct page, lru)) { - spin_unlock_irqrestore(&zone->lock, flags); - return 1 << order; - } + list_for_each(curr, &zone->free_area[order].free_list) { + unsigned long start_pfn, i; + + start_pfn = page_to_pfn(list_entry(curr, struct page, lru)); + + for (i=0; i < (1<lock, flags); - return 0; } /* @@ -537,12 +601,20 @@ void fastcall free_cold_page(struct page *page) free_hot_cold_page(page, 1); } +static inline void prep_zero_page(struct page *page, int order, int gfp_flags) +{ + int i; + + BUG_ON((gfp_flags & (__GFP_WAIT | __GFP_HIGHMEM)) == __GFP_HIGHMEM); + for(i = 0; i < (1 << order); i++) + clear_highpage(page + i); +} + /* * Really, prep_compound_page() should be called from __rmqueue_bulk(). But * we cheat by calling it from here, in the order > 0 path. Saves a branch * or two. */ - static struct page * buffered_rmqueue(struct zone *zone, int order, int gfp_flags) { @@ -577,42 +649,64 @@ buffered_rmqueue(struct zone *zone, int order, int gfp_flags) BUG_ON(bad_range(zone, page)); mod_page_state_zone(zone, pgalloc, 1 << order); prep_new_page(page, order); + + if (gfp_flags & __GFP_ZERO) + prep_zero_page(page, order, gfp_flags); + if (order && (gfp_flags & __GFP_COMP)) prep_compound_page(page, order); } return page; } +/* + * Return 1 if free pages are above 'mark'. This takes into account the order + * of the allocation. + */ +int zone_watermark_ok(struct zone *z, int order, unsigned long mark, + int classzone_idx, int can_try_harder, int gfp_high) +{ + /* free_pages my go negative - that's OK */ + long min = mark, free_pages = z->free_pages - (1 << order) + 1; + int o; + + if (gfp_high) + min -= min / 2; + if (can_try_harder) + min -= min / 4; + + if (free_pages <= min + z->lowmem_reserve[classzone_idx]) + return 0; + for (o = 0; o < order; o++) { + /* At the next order, this order's pages become unavailable */ + free_pages -= z->free_area[o].nr_free << o; + + /* Require fewer higher order pages to be free */ + min >>= 1; + + if (free_pages <= min) + return 0; + } + return 1; +} + /* * This is the 'heart' of the zoned buddy allocator. - * - * Herein lies the mysterious "incremental min". That's the - * - * local_low = z->pages_low; - * min += local_low; - * - * thing. The intent here is to provide additional protection to low zones for - * allocation requests which _could_ use higher zones. So a GFP_HIGHMEM - * request is not allowed to dip as deeply into the normal zone as a GFP_KERNEL - * request. This preserves additional space in those lower zones for requests - * which really do need memory from those zones. It means that on a decent - * sized machine, GFP_HIGHMEM and GFP_KERNEL requests basically leave the DMA - * zone untouched. */ struct page * fastcall __alloc_pages(unsigned int gfp_mask, unsigned int order, struct zonelist *zonelist) { const int wait = gfp_mask & __GFP_WAIT; - unsigned long min; struct zone **zones, *z; struct page *page; struct reclaim_state reclaim_state; struct task_struct *p = current; int i; - int alloc_type; + int classzone_idx; int do_retry; int can_try_harder; + int did_some_progress; might_sleep_if(wait); @@ -630,13 +724,14 @@ __alloc_pages(unsigned int gfp_mask, unsigned int order, return NULL; } - alloc_type = zone_idx(zones[0]); + classzone_idx = zone_idx(zones[0]); + restart: /* Go through the zonelist once, looking for a zone with enough free */ for (i = 0; (z = zones[i]) != NULL; i++) { - min = z->pages_low + (1<protection[alloc_type]; - if (z->free_pages < min) + if (!zone_watermark_ok(z, order, z->pages_low, + classzone_idx, 0, 0)) continue; page = buffered_rmqueue(z, order, gfp_mask); @@ -645,21 +740,16 @@ __alloc_pages(unsigned int gfp_mask, unsigned int order, } for (i = 0; (z = zones[i]) != NULL; i++) - wakeup_kswapd(z); + wakeup_kswapd(z, order); /* * Go through the zonelist again. Let __GFP_HIGH and allocations * coming from realtime tasks to go deeper into reserves */ for (i = 0; (z = zones[i]) != NULL; i++) { - min = z->pages_min; - if (gfp_mask & __GFP_HIGH) - min /= 2; - if (can_try_harder) - min -= min / 4; - min += (1<protection[alloc_type]; - - if (z->free_pages < min) + if (!zone_watermark_ok(z, order, z->pages_min, + classzone_idx, can_try_harder, + gfp_mask & __GFP_HIGH)) continue; page = buffered_rmqueue(z, order, gfp_mask); @@ -668,7 +758,7 @@ __alloc_pages(unsigned int gfp_mask, unsigned int order, } /* This allocation should allow future memory freeing. */ - if ((p->flags & (PF_MEMALLOC | PF_MEMDIE)) && !in_interrupt()) { + if (((p->flags & PF_MEMALLOC) || unlikely(test_thread_flag(TIF_MEMDIE))) && !in_interrupt()) { /* go through the zonelist yet again, ignoring mins */ for (i = 0; (z = zones[i]) != NULL; i++) { page = buffered_rmqueue(z, order, gfp_mask); @@ -683,31 +773,56 @@ __alloc_pages(unsigned int gfp_mask, unsigned int order, goto nopage; rebalance: + cond_resched(); + /* We now go into synchronous reclaim */ p->flags |= PF_MEMALLOC; reclaim_state.reclaimed_slab = 0; p->reclaim_state = &reclaim_state; - try_to_free_pages(zones, gfp_mask, order); + did_some_progress = try_to_free_pages(zones, gfp_mask, order); p->reclaim_state = NULL; p->flags &= ~PF_MEMALLOC; - /* go through the zonelist yet one more time */ - for (i = 0; (z = zones[i]) != NULL; i++) { - min = z->pages_min; - if (gfp_mask & __GFP_HIGH) - min /= 2; - if (can_try_harder) - min -= min / 4; - min += (1<protection[alloc_type]; - - if (z->free_pages < min) - continue; + cond_resched(); - page = buffered_rmqueue(z, order, gfp_mask); - if (page) - goto got_pg; + if (likely(did_some_progress)) { + /* + * Go through the zonelist yet one more time, keep + * very high watermark here, this is only to catch + * a parallel oom killing, we must fail if we're still + * under heavy pressure. + */ + for (i = 0; (z = zones[i]) != NULL; i++) { + if (!zone_watermark_ok(z, order, z->pages_min, + classzone_idx, can_try_harder, + gfp_mask & __GFP_HIGH)) + continue; + + page = buffered_rmqueue(z, order, gfp_mask); + if (page) + goto got_pg; + } + } else if ((gfp_mask & __GFP_FS) && !(gfp_mask & __GFP_NORETRY)) { + /* + * Go through the zonelist yet one more time, keep + * very high watermark here, this is only to catch + * a parallel oom killing, we must fail if we're still + * under heavy pressure. + */ + for (i = 0; (z = zones[i]) != NULL; i++) { + if (!zone_watermark_ok(z, order, z->pages_high, + classzone_idx, 0, 0)) + continue; + + page = buffered_rmqueue(z, order, gfp_mask); + if (page) + goto got_pg; + } + + out_of_memory(gfp_mask); + goto restart; } /* @@ -739,7 +854,6 @@ nopage: return NULL; got_pg: zone_statistics(zonelist, z); - kernel_map_pages(page, 1 << order, 1); return page; } @@ -769,12 +883,9 @@ fastcall unsigned long get_zeroed_page(unsigned int gfp_mask) */ BUG_ON(gfp_mask & __GFP_HIGHMEM); - page = alloc_pages(gfp_mask, 0); - if (page) { - void *address = page_address(page); - clear_page(address); - return (unsigned long) address; - } + page = alloc_pages(gfp_mask | __GFP_ZERO, 0); + if (page) + return (unsigned long) page_address(page); return 0; } @@ -902,8 +1013,7 @@ static void show_node(struct zone *zone) * The result is unavoidably approximate - it can change * during and after execution of this function. */ -DEFINE_PER_CPU(struct page_state, page_states) = {0}; -EXPORT_PER_CPU_SYMBOL(page_states); +static DEFINE_PER_CPU(struct page_state, page_states) = {0}; atomic_t nr_pagecache = ATOMIC_INIT(0); EXPORT_SYMBOL(nr_pagecache); @@ -916,18 +1026,18 @@ void __get_page_state(struct page_state *ret, int nr) int cpu = 0; memset(ret, 0, sizeof(*ret)); + + cpu = first_cpu(cpu_online_map); while (cpu < NR_CPUS) { unsigned long *in, *out, off; - if (!cpu_possible(cpu)) { - cpu++; - continue; - } - in = (unsigned long *)&per_cpu(page_states, cpu); - cpu++; - if (cpu < NR_CPUS && cpu_possible(cpu)) + + cpu = next_cpu(cpu, cpu_online_map); + + if (cpu < NR_CPUS) prefetch(&per_cpu(page_states, cpu)); + out = (unsigned long *)ret; for (off = 0; off < nr; off++) *out++ += *in++; @@ -954,18 +1064,28 @@ unsigned long __read_page_state(unsigned offset) unsigned long ret = 0; int cpu; - for (cpu = 0; cpu < NR_CPUS; cpu++) { + for_each_online_cpu(cpu) { unsigned long in; - if (!cpu_possible(cpu)) - continue; - in = (unsigned long)&per_cpu(page_states, cpu) + offset; ret += *((unsigned long *)in); } return ret; } +void __mod_page_state(unsigned offset, unsigned long delta) +{ + unsigned long flags; + void* ptr; + + local_irq_save(flags); + ptr = &__get_cpu_var(page_states); + *(unsigned long*)(ptr + offset) += delta; + local_irq_restore(flags); +} + +EXPORT_SYMBOL(__mod_page_state); + void __get_zone_counts(unsigned long *active, unsigned long *inactive, unsigned long *free, struct pglist_data *pgdat) { @@ -1119,14 +1239,13 @@ void show_free_areas(void) zone->pages_scanned, (zone->all_unreclaimable ? "yes" : "no") ); - printk("protections[]:"); + printk("lowmem_reserve[]:"); for (i = 0; i < MAX_NR_ZONES; i++) - printk(" %lu", zone->protection[i]); + printk(" %lu", zone->lowmem_reserve[i]); printk("\n"); } for_each_zone(zone) { - struct list_head *elem; unsigned long nr, flags, order, total = 0; show_node(zone); @@ -1138,9 +1257,7 @@ void show_free_areas(void) spin_lock_irqsave(&zone->lock, flags); for (order = 0; order < MAX_ORDER; order++) { - nr = 0; - list_for_each(elem, &zone->free_area[order].free_list) - ++nr; + nr = zone->free_area[order].nr_free; total += nr << order; printk("%lu*%lukB ", nr, K(1UL) << order); } @@ -1182,13 +1299,13 @@ static int __init build_zonelists_node(pg_data_t *pgdat, struct zonelist *zoneli } #ifdef CONFIG_NUMA -#define MAX_NODE_LOAD (numnodes) +#define MAX_NODE_LOAD (num_online_nodes()) static int __initdata node_load[MAX_NUMNODES]; /** * find_next_best_node - find the next node that should appear in a given * node's fallback list * @node: node whose fallback list we're appending - * @used_node_mask: pointer to the bitmap of already used nodes + * @used_node_mask: nodemask_t of already used nodes * * We use a number of factors to determine which is the next node that should * appear on a given node's fallback list. The node should not have appeared @@ -1199,24 +1316,24 @@ static int __initdata node_load[MAX_NUMNODES]; * on them otherwise. * It returns -1 if no node is found. */ -static int __init find_next_best_node(int node, void *used_node_mask) +static int __init find_next_best_node(int node, nodemask_t *used_node_mask) { int i, n, val; int min_val = INT_MAX; int best_node = -1; - for (i = 0; i < numnodes; i++) { + for_each_online_node(i) { cpumask_t tmp; /* Start from local node */ - n = (node+i)%numnodes; + n = (node+i) % num_online_nodes(); /* Don't want a node to appear more than once */ - if (test_bit(n, used_node_mask)) + if (node_isset(n, *used_node_mask)) continue; /* Use the local node if we haven't already */ - if (!test_bit(node, used_node_mask)) { + if (!node_isset(node, *used_node_mask)) { best_node = node; break; } @@ -1240,7 +1357,7 @@ static int __init find_next_best_node(int node, void *used_node_mask) } if (best_node >= 0) - set_bit(best_node, used_node_mask); + node_set(best_node, *used_node_mask); return best_node; } @@ -1250,7 +1367,7 @@ static void __init build_zonelists(pg_data_t *pgdat) int i, j, k, node, local_node; int prev_node, load; struct zonelist *zonelist; - DECLARE_BITMAP(used_mask, MAX_NUMNODES); + nodemask_t used_mask; /* initialize zonelists */ for (i = 0; i < GFP_ZONETYPES; i++) { @@ -1261,10 +1378,10 @@ static void __init build_zonelists(pg_data_t *pgdat) /* NUMA-aware ordering of nodes */ local_node = pgdat->node_id; - load = numnodes; + load = num_online_nodes(); prev_node = local_node; - bitmap_zero(used_mask, MAX_NUMNODES); - while ((node = find_next_best_node(local_node, used_mask)) >= 0) { + nodes_clear(used_mask); + while ((node = find_next_best_node(local_node, &used_mask)) >= 0) { /* * We don't want to pressure a particular node. * So adding penalty to the first node in same @@ -1320,11 +1437,17 @@ static void __init build_zonelists(pg_data_t *pgdat) * zones coming right after the local ones are those from * node N+1 (modulo N) */ - for (node = local_node + 1; node < numnodes; node++) - j = build_zonelists_node(NODE_DATA(node), zonelist, j, k); - for (node = 0; node < local_node; node++) - j = build_zonelists_node(NODE_DATA(node), zonelist, j, k); - + for (node = local_node + 1; node < MAX_NUMNODES; node++) { + if (!node_online(node)) + continue; + j = build_zonelists_node(NODE_DATA(node), zonelist, j, k); + } + for (node = 0; node < local_node; node++) { + if (!node_online(node)) + continue; + j = build_zonelists_node(NODE_DATA(node), zonelist, j, k); + } + zonelist->zones[j] = NULL; } } @@ -1335,9 +1458,9 @@ void __init build_all_zonelists(void) { int i; - for(i = 0 ; i < numnodes ; i++) + for_each_online_node(i) build_zonelists(NODE_DATA(i)); - printk("Built %i zonelists\n", numnodes); + printk("Built %i zonelists\n", num_online_nodes()); } /* @@ -1429,49 +1552,13 @@ void __init memmap_init_zone(unsigned long size, int nid, unsigned long zone, } } -/* - * Page buddy system uses "index >> (i+1)", where "index" is - * at most "size-1". - * - * The extra "+3" is to round down to byte size (8 bits per byte - * assumption). Thus we get "(size-1) >> (i+4)" as the last byte - * we can access. - * - * The "+1" is because we want to round the byte allocation up - * rather than down. So we should have had a "+7" before we shifted - * down by three. Also, we have to add one as we actually _use_ the - * last bit (it's [0,n] inclusive, not [0,n[). - * - * So we actually had +7+1 before we shift down by 3. But - * (n+8) >> 3 == (n >> 3) + 1 (modulo overflows, which we do not have). - * - * Finally, we LONG_ALIGN because all bitmap operations are on longs. - */ -unsigned long pages_to_bitmap_size(unsigned long order, unsigned long nr_pages) -{ - unsigned long bitmap_size; - - bitmap_size = (nr_pages-1) >> (order+4); - bitmap_size = LONG_ALIGN(bitmap_size+1); - - return bitmap_size; -} - -void zone_init_free_lists(struct pglist_data *pgdat, struct zone *zone, unsigned long size) +void zone_init_free_lists(struct pglist_data *pgdat, struct zone *zone, + unsigned long size) { int order; - for (order = 0; ; order++) { - unsigned long bitmap_size; - + for (order = 0; order < MAX_ORDER ; order++) { INIT_LIST_HEAD(&zone->free_area[order].free_list); - if (order == MAX_ORDER-1) { - zone->free_area[order].map = NULL; - break; - } - - bitmap_size = pages_to_bitmap_size(order, size); - zone->free_area[order].map = - (unsigned long *) alloc_bootmem_node(pgdat, bitmap_size); + zone->free_area[order].nr_free = 0; } } @@ -1496,6 +1583,7 @@ static void __init free_area_init_core(struct pglist_data *pgdat, pgdat->nr_zones = 0; init_waitqueue_head(&pgdat->kswapd_wait); + pgdat->kswapd_max_order = 0; for (j = 0; j < MAX_NR_ZONES; j++) { struct zone *zone = pgdat->node_zones + j; @@ -1583,7 +1671,7 @@ static void __init free_area_init_core(struct pglist_data *pgdat, zone->zone_start_pfn = zone_start_pfn; if ((zone_start_pfn) & (zone_required_alignment-1)) - printk("BUG: wrong zone alignment, it will crash\n"); + printk(KERN_CRIT "BUG: wrong zone alignment, it will crash\n"); memmap_init(size, nid, j, zone_start_pfn); @@ -1659,8 +1747,7 @@ static void frag_stop(struct seq_file *m, void *arg) } /* - * This walks the freelist for each zone. Whilst this is slow, I'd rather - * be slow here than slow down the fast path by keeping stats - mjbligh + * This walks the free areas for each zone. */ static int frag_show(struct seq_file *m, void *arg) { @@ -1676,14 +1763,8 @@ static int frag_show(struct seq_file *m, void *arg) spin_lock_irqsave(&zone->lock, flags); seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name); - for (order = 0; order < MAX_ORDER; ++order) { - unsigned long nr_bufs = 0; - struct list_head *elem; - - list_for_each(elem, &(zone->free_area[order].free_list)) - ++nr_bufs; - seq_printf(m, "%6lu ", nr_bufs); - } + for (order = 0; order < MAX_ORDER; ++order) + seq_printf(m, "%6lu ", zone->free_area[order].nr_free); spin_unlock_irqrestore(&zone->lock, flags); seq_putc(m, '\n'); } @@ -1799,14 +1880,28 @@ static int page_alloc_cpu_notify(struct notifier_block *self, { int cpu = (unsigned long)hcpu; long *count; + unsigned long *src, *dest; if (action == CPU_DEAD) { + int i; + /* Drain local pagecache count. */ count = &per_cpu(nr_pagecache_local, cpu); atomic_add(*count, &nr_pagecache); *count = 0; local_irq_disable(); __drain_pages(cpu); + + /* Add dead cpu's page_states to our own. */ + dest = (unsigned long *)&__get_cpu_var(page_states); + src = (unsigned long *)&per_cpu(page_states, cpu); + + for (i = 0; i < sizeof(struct page_state)/sizeof(unsigned long); + i++) { + dest[i] += src[i]; + src[i] = 0; + } + local_irq_enable(); } return NOTIFY_OK; @@ -1818,87 +1913,29 @@ void __init page_alloc_init(void) hotcpu_notifier(page_alloc_cpu_notify, 0); } -static unsigned long higherzone_val(struct zone *z, int max_zone, - int alloc_type) -{ - int z_idx = zone_idx(z); - struct zone *higherzone; - unsigned long pages; - - /* there is no higher zone to get a contribution from */ - if (z_idx == MAX_NR_ZONES-1) - return 0; - - higherzone = &z->zone_pgdat->node_zones[z_idx+1]; - - /* We always start with the higher zone's protection value */ - pages = higherzone->protection[alloc_type]; - - /* - * We get a lower-zone-protection contribution only if there are - * pages in the higher zone and if we're not the highest zone - * in the current zonelist. e.g., never happens for GFP_DMA. Happens - * only for ZONE_DMA in a GFP_KERNEL allocation and happens for ZONE_DMA - * and ZONE_NORMAL for a GFP_HIGHMEM allocation. - */ - if (higherzone->present_pages && z_idx < alloc_type) - pages += higherzone->pages_low * sysctl_lower_zone_protection; - - return pages; -} - /* - * setup_per_zone_protection - called whenver min_free_kbytes or - * sysctl_lower_zone_protection changes. Ensures that each zone - * has a correct pages_protected value, so an adequate number of + * setup_per_zone_lowmem_reserve - called whenever + * sysctl_lower_zone_reserve_ratio changes. Ensures that each zone + * has a correct pages reserved value, so an adequate number of * pages are left in the zone after a successful __alloc_pages(). - * - * This algorithm is way confusing. I tries to keep the same behavior - * as we had with the incremental min iterative algorithm. */ -static void setup_per_zone_protection(void) +static void setup_per_zone_lowmem_reserve(void) { struct pglist_data *pgdat; - struct zone *zones, *zone; - int max_zone; - int i, j; + int j, idx; for_each_pgdat(pgdat) { - zones = pgdat->node_zones; + for (j = 0; j < MAX_NR_ZONES; j++) { + struct zone * zone = pgdat->node_zones + j; + unsigned long present_pages = zone->present_pages; - for (i = 0, max_zone = 0; i < MAX_NR_ZONES; i++) - if (zones[i].present_pages) - max_zone = i; + zone->lowmem_reserve[j] = 0; - /* - * For each of the different allocation types: - * GFP_DMA -> GFP_KERNEL -> GFP_HIGHMEM - */ - for (i = 0; i < GFP_ZONETYPES; i++) { - /* - * For each of the zones: - * ZONE_HIGHMEM -> ZONE_NORMAL -> ZONE_DMA - */ - for (j = MAX_NR_ZONES-1; j >= 0; j--) { - zone = &zones[j]; - - /* - * We never protect zones that don't have memory - * in them (j>max_zone) or zones that aren't in - * the zonelists for a certain type of - * allocation (j>=i). We have to assign these - * to zero because the lower zones take - * contributions from the higher zones. - */ - if (j > max_zone || j >= i) { - zone->protection[i] = 0; - continue; - } - /* - * The contribution of the next higher zone - */ - zone->protection[i] = higherzone_val(zone, - max_zone, i); + for (idx = j-1; idx >= 0; idx--) { + struct zone * lower_zone = pgdat->node_zones + idx; + + lower_zone->lowmem_reserve[j] = present_pages / sysctl_lowmem_reserve_ratio[idx]; + present_pages += lower_zone->present_pages; } } } @@ -1993,7 +2030,7 @@ static int __init init_per_zone_pages_min(void) if (min_free_kbytes > 65536) min_free_kbytes = 65536; setup_per_zone_pages_min(); - setup_per_zone_protection(); + setup_per_zone_lowmem_reserve(); return 0; } module_init(init_per_zone_pages_min) @@ -2008,44 +2045,62 @@ int min_free_kbytes_sysctl_handler(ctl_table *table, int write, { proc_dointvec(table, write, file, buffer, length, ppos); setup_per_zone_pages_min(); - setup_per_zone_protection(); return 0; } /* - * lower_zone_protection_sysctl_handler - just a wrapper around - * proc_dointvec() so that we can call setup_per_zone_protection() - * whenever sysctl_lower_zone_protection changes. + * lowmem_reserve_ratio_sysctl_handler - just a wrapper around + * proc_dointvec() so that we can call setup_per_zone_lowmem_reserve() + * whenever sysctl_lowmem_reserve_ratio changes. + * + * The reserve ratio obviously has absolutely no relation with the + * pages_min watermarks. The lowmem reserve ratio can only make sense + * if in function of the boot time zone sizes. */ -int lower_zone_protection_sysctl_handler(ctl_table *table, int write, +int lowmem_reserve_ratio_sysctl_handler(ctl_table *table, int write, struct file *file, void __user *buffer, size_t *length, loff_t *ppos) { proc_dointvec_minmax(table, write, file, buffer, length, ppos); - setup_per_zone_protection(); + setup_per_zone_lowmem_reserve(); return 0; } +__initdata int hashdist = HASHDIST_DEFAULT; + +#ifdef CONFIG_NUMA +static int __init set_hashdist(char *str) +{ + if (!str) + return 0; + hashdist = simple_strtoul(str, &str, 0); + return 1; +} +__setup("hashdist=", set_hashdist); +#endif + /* * allocate a large system hash table from bootmem * - it is assumed that the hash table must contain an exact power-of-2 * quantity of entries + * - limit is the number of hash buckets, not the total allocation size */ void *__init alloc_large_system_hash(const char *tablename, unsigned long bucketsize, unsigned long numentries, int scale, - int consider_highmem, + int flags, unsigned int *_hash_shift, - unsigned int *_hash_mask) + unsigned int *_hash_mask, + unsigned long limit) { - unsigned long long max; + unsigned long long max = limit; unsigned long log2qty, size; - void *table; + void *table = NULL; /* allow the kernel cmdline to have a say */ if (!numentries) { /* round applicable memory size up to nearest megabyte */ - numentries = consider_highmem ? nr_all_pages : nr_kernel_pages; + numentries = (flags & HASH_HIGHMEM) ? nr_all_pages : nr_kernel_pages; numentries += (1UL << (20 - PAGE_SHIFT)) - 1; numentries >>= 20 - PAGE_SHIFT; numentries <<= 20 - PAGE_SHIFT; @@ -2059,9 +2114,11 @@ void *__init alloc_large_system_hash(const char *tablename, /* rounded up to nearest power of 2 in size */ numentries = 1UL << (long_log2(numentries) + 1); - /* limit allocation size to 1/16 total memory */ - max = ((unsigned long long)nr_all_pages << PAGE_SHIFT) >> 4; - do_div(max, bucketsize); + /* limit allocation size to 1/16 total memory by default */ + if (max == 0) { + max = ((unsigned long long)nr_all_pages << PAGE_SHIFT) >> 4; + do_div(max, bucketsize); + } if (numentries > max) numentries = max; @@ -2070,7 +2127,16 @@ void *__init alloc_large_system_hash(const char *tablename, do { size = bucketsize << log2qty; - table = alloc_bootmem(size); + if (flags & HASH_EARLY) + table = alloc_bootmem(size); + else if (hashdist) + table = __vmalloc(size, GFP_ATOMIC, PAGE_KERNEL); + else { + unsigned long order; + for (order = 0; ((1UL << order) << PAGE_SHIFT) < size; order++) + ; + table = (void*) __get_free_pages(GFP_ATOMIC, order); + } } while (!table && size > PAGE_SIZE && --log2qty); if (!table) -- 2.30.2